"Mathematics is the queen of the sciences, and number theory is the queen of mathematics" - Carl Friedrich Gauss
_

S525-IE Project: Insurance Data Analysis

Description

S525-IE at UMass refers to a course designation under the Integrative Experience (IE) requirement for mathematics or statistics students at UMass. STATISTC 525 is one of the courses students can take to fulfill the IE requirement as part of their degree program.

Contributor: Radha Dutta
Personal course project from Sam Castillo. Mathematics, minor actuarial science, 2017.

SAS Code

Data Import and Preparation

title 'IE Master ';
/*Note: this requires the updated insurance data file from github*/
proc import datafile="/folders/myfolders/IE_project/by_zip.csv" out=insurance_raw
    dbms=csv replace;
    getnames=yes;
run;
proc print data=insurance_raw;
run;
    

Create Dummy Geographic Variables

data insurance;
    set insurance_raw;
    if area = "fns" then fns = 1;
    if area = "ns" then ns = 1;
    if area = "nws" then nws = 1;
    if area = "s" then s = 1;
    if area = "ws" then ws = 1;
    cat_area = put(area, 10. -L);
    drop area;
    rename cat_area = area;
run;
    

Data Transformation and Model Exploration

/* In Summary, I find three possible models:
   model 1:  sqrt(invol) ~ log(fire) + income + race^(1/3) + area
   model 2:  sqrt(invol) ~ log(fire) + income + race^(1/3) + vol + area
   model 3: sqrt(invol) ~ log(fire) + income + race^(1/3)  + vol + theft + area
*/

data insurance_invol_adj;
    set insurance;
    root_invol=sqrt(invol);
run;

data insurance_invol_fire_adj;
    set insurance;
    log_fire=log(fire);
    root_invol=sqrt(invol);
run;

title 'root_invol v log_fire';
proc reg data=insurance_invol_fire_adj;
    model root_invol=log_fire;
run;

data insurance_income_adj;
    set insurance;
    root_invol=sqrt(invol);
run;

title 'root_invol v income';
proc reg data=insurance_income_adj;
    model root_invol=income;
    plot root_invol*income;
    output out=fireroot_invol r=income_Resid;
    plot r.*income r.*p. r.*nqq.;
run;

data insurance_race_adj;
    set insurance;
    root_invol=sqrt(invol);
    root_race=race**(1/3);
run;

title 'root_invol v cube_root_race';
proc reg data=insurance_race_adj;
    model root_invol=root_race;
    plot root_invol*root_race;
    output out = race_root_invol r = root_race_Resid;
    plot r.*root_race r.*p. r.*nqq.;
run;
    

Next Steps