/*=====================================================================*/
/* Multi-variable Data Mining models/predictions                       */
/*=====================================================================*/
/* Write a model that would predict the success (defined as grade>=90) */
/* of students based on their program and sex                          */
/* - First segment the raw data based on program and sex               */
/* - Next perform average on the various multi-variable combinations   */
/*=====================================================================*/
SELECT avg(Grad_M_seg)   as "Graduate M grade>90 prediction%", 
       avg(Grad_F_seg)   as "Graduate F grade>90 prediction%",
       avg(undGrad_M_seg)as "Undergrad M grade>90 rediction%",
       avg(undGrad_F_seg)as "Undergrad F grade>90 prediction%"
FROM (
    SELECT program, sex                           /*independent variables*/
        grade,                                    /*fact, or target variable*/
        case when program='graduate' and sex='M' then
            case when grade>=90 then 1 else 0 end 
        end as Grad_M_seg,  
        case when program='graduate' and sex='F' then
            case when grade>=90 then 1 else 0 end 
        end as Grad_F_seg,  
        case when program='undergraduate' and sex='M' then
            case when grade>=90 then 1 else 0 end 
        end as undGrad_M_seg,  
        case when program='undergraduate' and sex='F' then
            case when grade>=90 then 1 else 0 end 
        end as undGrad_F_seg 
    FROM grade natural join student
) raw_data
;

/*=================================================================*/
/* Write a model that would predict if there is instructor/student */
/* grade collusion based on sex                                    */
/*=================================================================*/
select round(avg(MM_seg),4) as "insM/stuM grade>90 prediction%",
       round(avg(MF_seg),4) as "insM/stuF grade>90 prediction%",
       round(avg(FM_seg),4) as "insF/stuM grade>90 prediction%",
       round(avg(FF_seg),4) as "insF/stuF grade>90 prediction%"
from (
    select i.sex as inst_sex, s.sex as stu_sex,       /*dimensions*/ 
           grade,                                     /*fact*/
           case when i.sex='M' and s.sex='M' then
               case when grade>90 then 1 else 0 end 
           end as MM_seg,  
           case when i.sex='M' and s.sex='F' then
               case when grade>90 then 1 else 0 end 
           end as MF_seg,  
           case when i.sex='F' and s.sex='M' then
               case when grade>90 then 1 else 0 end 
           end as FM_seg,  
           case when i.sex='F' and s.sex='F' then
               case when grade>90 then 1 else 0 end 
           end as FF_seg  
    from grade join student s    using(student_id)
               join instructor i using(instructor_id)
) raw_data
;

/*===============================================================*/
/* Write a model that would predict the probability of obtaining */
/* a grade >= 85 based on program and school_year                */
/*===============================================================*/
select round(avg(yr1),4) as "year1 grade>=85 prediction%", 
       round(avg(yr2),4) as "year2 grade>=85 prediction%",
       round(avg(yr3),4) as "year3 grade>=85 prediction%",
       round(avg(yr4),4) as "year4 grade>=85 prediction%",
       round(avg(yr5),4) as "yearG1 grade>=85 prediction%",
       round(avg(yr6),4) as "yearG2 grade>=85 prediction%"
from (
    select program, school_year, 
           grade,
           case when program='undergraduate' and school_year=1 then
               case when grade>=85 then 1 else 0 end 
           end as yr1,  
           case when program='undergraduate' and school_year=2 then
               case when grade>85 then 1 else 0 end 
           end as yr2,  
           case when program='undergraduate' and school_year=3 then
               case when grade>=85 then 1 else 0 end 
           end as yr3, 
           case when program='undergraduate' and school_year=4 then
               case when grade>85 then 1 else 0 end 
           end as yr4,  
           case when program='graduate' and school_year=1 then
               case when grade>=85 then 1 else 0 end 
           end as yr5,  
           case when program='graduate' and school_year=2 then
               case when grade>85 then 1 else 0 end 
           end as yr6  
    from grade natural join student 
) raw_data
;

/*===========================================================================*/
/* Write a model that would predict the probability of grade>=90 for student */
/* based on student sex and course department/concentration                  */
/*===========================================================================*/
select round(avg(IT_M), 4) as "M-IT  success prediction", 
       round(avg(IT_F), 4) as "F-IT  success prediction",
       round(avg(Web_M),4) as "M-Web success prediction",
       round(avg(Web_F),4) as "F-Web success prediction",
       round(avg(DB_M), 4) as "M-DB  success prediction",
       round(avg(DB_F), 4) as "F-DB  success prediction"
from (
    select department, sex 
           grade,
           case when department like 'Information%' and sex='M' then
               case when grade>=90 then 1 else 0 end 
           end as IT_M,  
           case when department like 'Information%' and sex='F' then
               case when grade>=90 then 1 else 0 end 
           end as IT_F,  
           case when department like 'Web%' and sex='M' then
               case when grade>=90 then 1 else 0 end 
           end as Web_M,  
           case when department like 'Web%' and sex='F' then
               case when grade>=90 then 1 else 0 end 
           end as Web_F,  
           case when department like 'Database%' and sex='M' then
               case when grade>=90 then 1 else 0 end 
           end as DB_M,  
           case when department like 'Database%' and sex='F' then
               case when grade>=90 then 1 else 0 end 
           end as DB_F  
    from grade natural join student
               natural join course  
) raw_data
;