#NO CHANGES NEEDED
set.seed(1)
library(knitr)
library(broom)
library(ggplot2)
library(plyr)
library(corrplot)
library(caret)
library(gridExtra)
library(scales)
library(Rmisc)
library(randomForest)
library(psych)
library(xgboost)
library(rpart)
library(rpart.plot)
library(randomForest)
library(tidyverse)
theme_set(theme_minimal())
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# 1. Disable the broken uv temp stuff
Sys.setenv(RETICULATE_UV_ENABLED = "0")
# 2. Tell reticulate the full path to your real Python 3.13.2
reticulate::use_python("C:/Users/casti/AppData/Local/Programs/Python/Python313/python.exe", required = TRUE)
# 3. Restart R session inside RStudio
#.rs.restartR()

library(reticulate)
datasets <- import("datasets")
# Load the dataset
ds <- datasets$load_dataset("supersam7/apartment_apps")
df <- ds["train"]$to_pandas()    # or ds$train$to_pandas() also works
df <- df[-1]
head(df)
##   applicants sale_price num_units year_sold month_sold overall_qual
## 1          3     208500         4      2008          2            7
## 2          5     181500         5      2007          5            6
## 3          3     223500         1      2008          9            7
## 4          4     140000         3      2011          2            7
## 5          5     250000         5      2008         12            8
## 6          5     143000         3      2009         10            5
##   total_sq_feet gr_liv_area tot_bathrooms lot_area exter_qual full_bath
## 1      3586.886    2785.060             5 3898.256          4         4
## 2      3560.348    2315.267             3 4149.410          3         4
## 3      3672.314    2852.313             5 4461.603          4         4
## 4      3527.523    2791.378             3 4139.131          3         2
## 5      4012.276    3173.357             5 4928.292          4         4
## 6      3308.431    2433.181             3 4908.174          3         2
##   central_air garage_type_attchd garage_type_basment garage_type_builtIn
## 1         yes                  1                   0                   0
## 2         yes                  1                   0                   0
## 3         yes                  1                   0                   0
## 4         yes                  0                   0                   0
## 5         yes                  1                   0                   0
## 6         yes                  1                   0                   0
##   garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## 1                  0                     0                  0
## 2                  0                     0                  0
## 3                  0                     0                  0
## 4                  1                     0                  0
## 5                  0                     0                  0
## 6                  0                     0                  0
##   neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## 1                     0                     0                     1
## 2                     0                     0                     0
## 3                     0                     0                     1
## 4                     0                     0                     0
## 5                     0                     0                     0
## 6                     0                     0                     0
##   neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## 1                    0                    0                    0
## 2                    0                    0                    0
## 3                    0                    0                    0
## 4                    1                    0                    0
## 5                    0                    0                    0
## 6                    0                    0                    0
##   neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## 1                    0                    0                    0
## 2                    0                    0                    0
## 3                    0                    0                    0
## 4                    0                    0                    0
## 5                    0                    0                    0
## 6                    0                    0                    1
##   neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## 1                   0                    0                          0
## 2                   0                    0                          0
## 3                   0                    0                          0
## 4                   0                    0                          0
## 5                   0                    1                          0
## 6                   0                    0                          0
##   neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## 1                     0                     0                   0
## 2                     0                     0                   0
## 3                     0                     0                   0
## 4                     0                     0                   0
## 5                     0                     0                   0
## 6                     0                     0                   0
##   neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## 1                     0                    0                     0
## 2                     0                    0                     0
## 3                     0                    0                     0
## 4                     0                    0                     0
## 5                     0                    0                     0
## 6                     0                    0                     0
##   neighborhood_swisu neighborhood_timber neighborhood_veenker
## 1                  0                   0                    0
## 2                  0                   0                    1
## 3                  0                   0                    0
## 4                  0                   0                    0
## 5                  0                   0                    0
## 6                  0                   0                    0
##   neighborhood_saleprice
## 1               198517.7
## 2               245890.6
## 3               198517.7
## 4               213681.7
## 5               331835.3
## 6               157755.8

Set up

Your assistant has provided you with these code templates.

#This function will calculate the log-liklihood based on a Poisson model for the number of applicants
LLfunction <- function(targets, predicted_values){
  p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
  p_v_pos <- ifelse(predicted_values <= 0, 0.000001 ,predicted_values)
  return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
print("loglikelihood")
LLfunction(test$applicants,predictions)

This code creates a scatter plot, a box plot, and a histogram.

#create a boxplot
ggplot(df, aes(as.factor(applicants),VARIABLE)) + 
  geom_boxplot()

#create a scatterplot
ggplot(df,aes(applicants,VARIABLE)) + 
  geom_point()

#create a histogram
ggplot(df,aes(VARIABLE)) + 
  geom_histogram()

#create a bar plot
ggplot(df, aes(VARIABLE)) + 
  geom_bar(stat = "count")

Shows the average number of applicants across factor levels.

#Average number of health applicants per unit
df %>% 
  group_by(VARIABLE) %>% 
  summarise(
    average_num_applicants = sum(applicants*num_units)/sum(num_units)
  )

Converts variables to numeric or factor while also setting the base reference level to the value which has the most observations.

##Convert to factor and set factor levels
df <- df %>% mutate(VARIABLE = fct_infreq(as.character(VARIABLE))

#Using Base R
df$VARIABLE = fct_infreq(as.character(df$VARIABLE))

#Convert to numeric
df <- df %>% mutate(VARIABLE = fct_infreq(as.characterVARIABLE))

#Using Base R
df$VARIABLE = as.numeric(df$VARIABLE)

Task 1 - Examine the target variable and number of units

glimpse(df)
## Rows: 1,430
## Columns: 41
## $ applicants                 <dbl> 3, 5, 3, 4, 5, 5, 5, 5, 2, 3, 1, 6, 1, 5, 3…
## $ sale_price                 <dbl> 208500, 181500, 223500, 140000, 250000, 143…
## $ num_units                  <dbl> 4, 5, 1, 3, 5, 3, 1, 3, 5, 4, 3, 1, 5, 2, 5…
## $ year_sold                  <dbl> 2008, 2007, 2008, 2011, 2008, 2009, 2007, 2…
## $ month_sold                 <dbl> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8…
## $ overall_qual               <dbl> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6…
## $ total_sq_feet              <dbl> 3586.886, 3560.348, 3672.314, 3527.523, 401…
## $ gr_liv_area                <dbl> 2785.060, 2315.267, 2852.313, 2791.378, 317…
## $ tot_bathrooms              <dbl> 5, 3, 5, 3, 5, 3, 4, 5, 3, 3, 3, 5, 3, 3, 3…
## $ lot_area                   <dbl> 3898.256, 4149.410, 4461.603, 4139.131, 492…
## $ exter_qual                 <dbl> 4, 3, 4, 3, 4, 3, 4, 3, 3, 3, 3, 5, 3, 4, 3…
## $ full_bath                  <dbl> 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 2, 6, 2, 4, 2…
## $ central_air                <chr> "yes", "yes", "yes", "yes", "yes", "yes", "…
## $ garage_type_attchd         <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1…
## $ garage_type_basment        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ garage_type_builtIn        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ garage_type_detchd         <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0…
## $ garage_type_no_garage      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NeighborhoodBrDale         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_brk_side      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ neighborhood_clear_cr      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_collg_cr      <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ neighborhood_crawfor       <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_edwards       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_gilbert       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_idottrr       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_meadowv       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_mitchel       <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_n_ames        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ neighborhood_n_ridge       <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_n_ridge_hghts <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ neighborhood_n_w_ames      <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_old_town      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ neighborhood_sawyer        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0…
## $ neighborhood_sawyer_w      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_somerst       <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_stone_br      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_swisu         <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_timber        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_veenker       <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_saleprice     <dbl> 198517.7, 245890.6, 198517.7, 213681.7, 331…
summary(df)
##    applicants      sale_price       num_units       year_sold   
##  Min.   :0.000   Min.   : 34900   Min.   :1.000   Min.   :2007  
##  1st Qu.:2.000   1st Qu.:129600   1st Qu.:1.000   1st Qu.:2008  
##  Median :4.000   Median :163000   Median :2.000   Median :2009  
##  Mean   :3.633   Mean   :181069   Mean   :2.685   Mean   :2009  
##  3rd Qu.:5.000   3rd Qu.:214375   3rd Qu.:4.000   3rd Qu.:2010  
##  Max.   :7.000   Max.   :755000   Max.   :5.000   Max.   :2011  
##    month_sold      overall_qual    total_sq_feet   gr_liv_area    
##  Min.   : 1.000   Min.   : 1.000   Min.   : 311   Min.   : 261.6  
##  1st Qu.: 5.000   1st Qu.: 5.000   1st Qu.:3193   1st Qu.:2139.6  
##  Median : 6.000   Median : 6.000   Median :3528   Median :2547.0  
##  Mean   : 6.329   Mean   : 6.082   Mean   :3510   Mean   :2511.2  
##  3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:3852   3rd Qu.:2852.3  
##  Max.   :12.000   Max.   :10.000   Max.   :5171   Max.   :4273.5  
##  tot_bathrooms      lot_area         exter_qual     full_bath    
##  Min.   :1.000   Min.   :  465.9   Min.   :2.00   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.: 3725.6   1st Qu.:3.00   1st Qu.:2.000  
##  Median :3.000   Median : 4138.8   Median :3.00   Median :4.000  
##  Mean   :2.952   Mean   : 4070.4   Mean   :3.39   Mean   :3.115  
##  3rd Qu.:3.000   3rd Qu.: 4529.0   3rd Qu.:4.00   3rd Qu.:4.000  
##  Max.   :8.000   Max.   :10271.5   Max.   :5.00   Max.   :6.000  
##  central_air        garage_type_attchd garage_type_basment garage_type_builtIn
##  Length:1430        Min.   :0.0000     Min.   :0.00000     Min.   :0.00000    
##  Class :character   1st Qu.:0.0000     1st Qu.:0.00000     1st Qu.:0.00000    
##  Mode  :character   Median :1.0000     Median :0.00000     Median :0.00000    
##                     Mean   :0.5888     Mean   :0.01329     Mean   :0.06084    
##                     3rd Qu.:1.0000     3rd Qu.:0.00000     3rd Qu.:0.00000    
##                     Max.   :1.0000     Max.   :1.00000     Max.   :1.00000    
##  garage_type_detchd garage_type_no_garage NeighborhoodBrDale
##  Min.   :0.0000     Min.   :0.00000       Min.   :0.00000   
##  1st Qu.:0.0000     1st Qu.:0.00000       1st Qu.:0.00000   
##  Median :0.0000     Median :0.00000       Median :0.00000   
##  Mean   :0.2699     Mean   :0.05664       Mean   :0.01119   
##  3rd Qu.:1.0000     3rd Qu.:0.00000       3rd Qu.:0.00000   
##  Max.   :1.0000     Max.   :1.00000       Max.   :1.00000   
##  neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
##  Min.   :0.00000       Min.   :0.00000       Min.   :0.0000       
##  1st Qu.:0.00000       1st Qu.:0.00000       1st Qu.:0.0000       
##  Median :0.00000       Median :0.00000       Median :0.0000       
##  Mean   :0.04056       Mean   :0.01958       Mean   :0.1049       
##  3rd Qu.:0.00000       3rd Qu.:0.00000       3rd Qu.:0.0000       
##  Max.   :1.00000       Max.   :1.00000       Max.   :1.0000       
##  neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.00000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.00000     
##  Median :0.00000      Median :0.00000      Median :0.00000     
##  Mean   :0.03566      Mean   :0.06853      Mean   :0.05524     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.00000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.00000     
##  neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.00000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.00000     
##  Median :0.00000      Median :0.00000      Median :0.00000     
##  Mean   :0.02587      Mean   :0.01189      Mean   :0.03427     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.00000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.00000     
##  neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
##  Min.   :0.0000      Min.   :0.00000      Min.   :0.00000           
##  1st Qu.:0.0000      1st Qu.:0.00000      1st Qu.:0.00000           
##  Median :0.0000      Median :0.00000      Median :0.00000           
##  Mean   :0.1573      Mean   :0.02867      Mean   :0.05385           
##  3rd Qu.:0.0000      3rd Qu.:0.00000      3rd Qu.:0.00000           
##  Max.   :1.0000      Max.   :1.00000      Max.   :1.00000           
##  neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
##  Min.   :0.00000       Min.   :0.00000       Min.   :0.00000    
##  1st Qu.:0.00000       1st Qu.:0.00000       1st Qu.:0.00000    
##  Median :0.00000       Median :0.00000       Median :0.00000    
##  Mean   :0.05105       Mean   :0.07902       Mean   :0.05175    
##  3rd Qu.:0.00000       3rd Qu.:0.00000       3rd Qu.:0.00000    
##  Max.   :1.00000       Max.   :1.00000       Max.   :1.00000    
##  neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
##  Min.   :0.00000       Min.   :0.00000      Min.   :0.00000      
##  1st Qu.:0.00000       1st Qu.:0.00000      1st Qu.:0.00000      
##  Median :0.00000       Median :0.00000      Median :0.00000      
##  Mean   :0.04126       Mean   :0.06014      Mean   :0.01748      
##  3rd Qu.:0.00000       3rd Qu.:0.00000      3rd Qu.:0.00000      
##  Max.   :1.00000       Max.   :1.00000      Max.   :1.00000      
##  neighborhood_swisu neighborhood_timber neighborhood_veenker
##  Min.   :0.00000    Min.   :0.00000     Min.   :0.000000    
##  1st Qu.:0.00000    1st Qu.:0.00000     1st Qu.:0.000000    
##  Median :0.00000    Median :0.00000     Median :0.000000    
##  Mean   :0.01748    Mean   :0.02657     Mean   :0.007692    
##  3rd Qu.:0.00000    3rd Qu.:0.00000     3rd Qu.:0.000000    
##  Max.   :1.00000    Max.   :1.00000     Max.   :1.000000    
##  neighborhood_saleprice
##  Min.   :101389        
##  1st Qu.:136902        
##  Median :181841        
##  Mean   :180909        
##  3rd Qu.:198518        
##  Max.   :331835
sum(df$num_units)
## [1] 3839
sum(df$num_units*df$applicants)/sum(df$num_units)
## [1] 3.588695
df %>% summarise(avg_applicants = sum(applicants*num_units)/sum(num_units))
##   avg_applicants
## 1       3.588695
#NO CODE CHANGES NEEDED
df %>% 
  group_by(applicants) %>% 
  summarise(total_units = sum(num_units))
## # A tibble: 8 × 2
##   applicants total_units
##        <dbl>       <dbl>
## 1          0         152
## 2          1         360
## 3          2         532
## 4          3         898
## 5          4         487
## 6          5        1028
## 7          6         103
## 8          7         279
#Graph A: Histogram
ggplot(data=df, aes(applicants)) +
        geom_histogram()

#Graph B: Bar plot
df %>% 
  mutate(applicants = as.factor(applicants)) %>% 
  group_by(applicants) %>% 
  summarise(
    total_units = sum(num_units)
  ) %>% 
  ungroup() %>% 
  ggplot(aes(x=applicants, y=total_units)) + 
  geom_bar(stat="identity") 

Task 2 - Explore the predictor variables

# Task 2 - Explore the predictor variables – FIXED VERSION
summary(df)
##    applicants      sale_price       num_units       year_sold   
##  Min.   :0.000   Min.   : 34900   Min.   :1.000   Min.   :2007  
##  1st Qu.:2.000   1st Qu.:129600   1st Qu.:1.000   1st Qu.:2008  
##  Median :4.000   Median :163000   Median :2.000   Median :2009  
##  Mean   :3.633   Mean   :181069   Mean   :2.685   Mean   :2009  
##  3rd Qu.:5.000   3rd Qu.:214375   3rd Qu.:4.000   3rd Qu.:2010  
##  Max.   :7.000   Max.   :755000   Max.   :5.000   Max.   :2011  
##    month_sold      overall_qual    total_sq_feet   gr_liv_area    
##  Min.   : 1.000   Min.   : 1.000   Min.   : 311   Min.   : 261.6  
##  1st Qu.: 5.000   1st Qu.: 5.000   1st Qu.:3193   1st Qu.:2139.6  
##  Median : 6.000   Median : 6.000   Median :3528   Median :2547.0  
##  Mean   : 6.329   Mean   : 6.082   Mean   :3510   Mean   :2511.2  
##  3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:3852   3rd Qu.:2852.3  
##  Max.   :12.000   Max.   :10.000   Max.   :5171   Max.   :4273.5  
##  tot_bathrooms      lot_area         exter_qual     full_bath    
##  Min.   :1.000   Min.   :  465.9   Min.   :2.00   Min.   :0.000  
##  1st Qu.:3.000   1st Qu.: 3725.6   1st Qu.:3.00   1st Qu.:2.000  
##  Median :3.000   Median : 4138.8   Median :3.00   Median :4.000  
##  Mean   :2.952   Mean   : 4070.4   Mean   :3.39   Mean   :3.115  
##  3rd Qu.:3.000   3rd Qu.: 4529.0   3rd Qu.:4.00   3rd Qu.:4.000  
##  Max.   :8.000   Max.   :10271.5   Max.   :5.00   Max.   :6.000  
##  central_air        garage_type_attchd garage_type_basment garage_type_builtIn
##  Length:1430        Min.   :0.0000     Min.   :0.00000     Min.   :0.00000    
##  Class :character   1st Qu.:0.0000     1st Qu.:0.00000     1st Qu.:0.00000    
##  Mode  :character   Median :1.0000     Median :0.00000     Median :0.00000    
##                     Mean   :0.5888     Mean   :0.01329     Mean   :0.06084    
##                     3rd Qu.:1.0000     3rd Qu.:0.00000     3rd Qu.:0.00000    
##                     Max.   :1.0000     Max.   :1.00000     Max.   :1.00000    
##  garage_type_detchd garage_type_no_garage NeighborhoodBrDale
##  Min.   :0.0000     Min.   :0.00000       Min.   :0.00000   
##  1st Qu.:0.0000     1st Qu.:0.00000       1st Qu.:0.00000   
##  Median :0.0000     Median :0.00000       Median :0.00000   
##  Mean   :0.2699     Mean   :0.05664       Mean   :0.01119   
##  3rd Qu.:1.0000     3rd Qu.:0.00000       3rd Qu.:0.00000   
##  Max.   :1.0000     Max.   :1.00000       Max.   :1.00000   
##  neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
##  Min.   :0.00000       Min.   :0.00000       Min.   :0.0000       
##  1st Qu.:0.00000       1st Qu.:0.00000       1st Qu.:0.0000       
##  Median :0.00000       Median :0.00000       Median :0.0000       
##  Mean   :0.04056       Mean   :0.01958       Mean   :0.1049       
##  3rd Qu.:0.00000       3rd Qu.:0.00000       3rd Qu.:0.0000       
##  Max.   :1.00000       Max.   :1.00000       Max.   :1.0000       
##  neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.00000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.00000     
##  Median :0.00000      Median :0.00000      Median :0.00000     
##  Mean   :0.03566      Mean   :0.06853      Mean   :0.05524     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.00000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.00000     
##  neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.00000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.00000     
##  Median :0.00000      Median :0.00000      Median :0.00000     
##  Mean   :0.02587      Mean   :0.01189      Mean   :0.03427     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.00000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.00000     
##  neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
##  Min.   :0.0000      Min.   :0.00000      Min.   :0.00000           
##  1st Qu.:0.0000      1st Qu.:0.00000      1st Qu.:0.00000           
##  Median :0.0000      Median :0.00000      Median :0.00000           
##  Mean   :0.1573      Mean   :0.02867      Mean   :0.05385           
##  3rd Qu.:0.0000      3rd Qu.:0.00000      3rd Qu.:0.00000           
##  Max.   :1.0000      Max.   :1.00000      Max.   :1.00000           
##  neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
##  Min.   :0.00000       Min.   :0.00000       Min.   :0.00000    
##  1st Qu.:0.00000       1st Qu.:0.00000       1st Qu.:0.00000    
##  Median :0.00000       Median :0.00000       Median :0.00000    
##  Mean   :0.05105       Mean   :0.07902       Mean   :0.05175    
##  3rd Qu.:0.00000       3rd Qu.:0.00000       3rd Qu.:0.00000    
##  Max.   :1.00000       Max.   :1.00000       Max.   :1.00000    
##  neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
##  Min.   :0.00000       Min.   :0.00000      Min.   :0.00000      
##  1st Qu.:0.00000       1st Qu.:0.00000      1st Qu.:0.00000      
##  Median :0.00000       Median :0.00000      Median :0.00000      
##  Mean   :0.04126       Mean   :0.06014      Mean   :0.01748      
##  3rd Qu.:0.00000       3rd Qu.:0.00000      3rd Qu.:0.00000      
##  Max.   :1.00000       Max.   :1.00000      Max.   :1.00000      
##  neighborhood_swisu neighborhood_timber neighborhood_veenker
##  Min.   :0.00000    Min.   :0.00000     Min.   :0.000000    
##  1st Qu.:0.00000    1st Qu.:0.00000     1st Qu.:0.000000    
##  Median :0.00000    Median :0.00000     Median :0.000000    
##  Mean   :0.01748    Mean   :0.02657     Mean   :0.007692    
##  3rd Qu.:0.00000    3rd Qu.:0.00000     3rd Qu.:0.000000    
##  Max.   :1.00000    Max.   :1.00000     Max.   :1.000000    
##  neighborhood_saleprice
##  Min.   :101389        
##  1st Qu.:136902        
##  Median :181841        
##  Mean   :180909        
##  3rd Qu.:198518        
##  Max.   :331835
#NO CHANGES NEEDED – weighted averages
df %>%
  summarise(
    mean_number_of_units = sum(num_units),
    mean_sale_price = dollar(sum(num_units*sale_price)/sum(num_units)),
    mean_overall_qual = sum(num_units*overall_qual)/sum(num_units)
  )
##   mean_number_of_units mean_sale_price mean_overall_qual
## 1                 3839        $180,146          6.060172
# ... your plots ...

# FIXED: Keep the original columns, just ADD the logged versions
df <- df %>% 
  mutate(
    log_rel_price   = log(sale_price / neighborhood_saleprice),
    log_sale_price  = log(sale_price)
    # DO NOT drop sale_price or neighborhood_saleprice here!
  )
#overall qual
ggplot(df,aes(overall_qual)) + 
  geom_bar(stat = "count")

df %>% 
  summarise(
    avg = sum(num_units*overall_qual)/sum(num_units)
  ) 
##        avg
## 1 6.060172
df %>% 
  group_by(overall_qual) %>% 
  summarise(
    total_units = sum(num_units)
  ) %>% 
  ggplot(aes(x=overall_qual, y=total_units)) + 
  geom_bar(stat="identity") 

#central air

df %>% 
  group_by(central_air) %>% 
  summarise(
    total_units = sum(num_units)
  ) %>% 
  ggplot(aes(x=central_air, y=total_units)) + 
  geom_bar(stat="identity") 

df %>% 
  group_by(central_air) %>% 
  summarise(
    total_units = sum(num_units)
  ) 
## # A tibble: 2 × 2
##   central_air total_units
##   <chr>             <dbl>
## 1 no                  269
## 2 yes                3570
df %>% count(overall_qual)
##    overall_qual   n
## 1             1   2
## 2             2   3
## 3             3  20
## 4             4 116
## 5             5 397
## 6             6 363
## 7             7 305
## 8             8 165
## 9             9  43
## 10           10  16
#check that bae level (the one that appears first) is the one with the most observations
summary(df$central_air)
##    Length     Class      Mode 
##      1430 character character
df %>% group_by(central_air) %>% 
  summarise(applicants=sum(applicants),
            total_units = sum(num_units))
## # A tibble: 2 × 3
##   central_air applicants total_units
##   <chr>            <dbl>       <dbl>
## 1 no                  54         269
## 2 yes               5141        3570
ggplot(df,aes(as.factor(central_air),applicants)) + 
  geom_boxplot()

df$central_air <- fct_infreq(df$central_air)
summary(df$central_air)
##  yes   no 
## 1335   95

Task 3 - Engineer three additional features

Code is provided to calculate the average and create box plots of sale_price by year and month.

df <- df %>% 
  mutate(
    sale_price_per_sqft = exp(log_rel_price) / total_sq_feet,
    bath_pr_sqft = tot_bathrooms/total_sq_feet,
    student_apt = ifelse(month_sold %in% c(7,8),1,0)
  )
df %>% count(student_apt)
##   student_apt    n
## 1           0 1079
## 2           1  351
df %>% summary()
##    applicants      sale_price       num_units       year_sold   
##  Min.   :0.000   Min.   : 34900   Min.   :1.000   Min.   :2007  
##  1st Qu.:2.000   1st Qu.:129600   1st Qu.:1.000   1st Qu.:2008  
##  Median :4.000   Median :163000   Median :2.000   Median :2009  
##  Mean   :3.633   Mean   :181069   Mean   :2.685   Mean   :2009  
##  3rd Qu.:5.000   3rd Qu.:214375   3rd Qu.:4.000   3rd Qu.:2010  
##  Max.   :7.000   Max.   :755000   Max.   :5.000   Max.   :2011  
##    month_sold      overall_qual    total_sq_feet   gr_liv_area    
##  Min.   : 1.000   Min.   : 1.000   Min.   : 311   Min.   : 261.6  
##  1st Qu.: 5.000   1st Qu.: 5.000   1st Qu.:3193   1st Qu.:2139.6  
##  Median : 6.000   Median : 6.000   Median :3528   Median :2547.0  
##  Mean   : 6.329   Mean   : 6.082   Mean   :3510   Mean   :2511.2  
##  3rd Qu.: 8.000   3rd Qu.: 7.000   3rd Qu.:3852   3rd Qu.:2852.3  
##  Max.   :12.000   Max.   :10.000   Max.   :5171   Max.   :4273.5  
##  tot_bathrooms      lot_area         exter_qual     full_bath     central_air
##  Min.   :1.000   Min.   :  465.9   Min.   :2.00   Min.   :0.000   yes:1335   
##  1st Qu.:3.000   1st Qu.: 3725.6   1st Qu.:3.00   1st Qu.:2.000   no :  95   
##  Median :3.000   Median : 4138.8   Median :3.00   Median :4.000              
##  Mean   :2.952   Mean   : 4070.4   Mean   :3.39   Mean   :3.115              
##  3rd Qu.:3.000   3rd Qu.: 4529.0   3rd Qu.:4.00   3rd Qu.:4.000              
##  Max.   :8.000   Max.   :10271.5   Max.   :5.00   Max.   :6.000              
##  garage_type_attchd garage_type_basment garage_type_builtIn garage_type_detchd
##  Min.   :0.0000     Min.   :0.00000     Min.   :0.00000     Min.   :0.0000    
##  1st Qu.:0.0000     1st Qu.:0.00000     1st Qu.:0.00000     1st Qu.:0.0000    
##  Median :1.0000     Median :0.00000     Median :0.00000     Median :0.0000    
##  Mean   :0.5888     Mean   :0.01329     Mean   :0.06084     Mean   :0.2699    
##  3rd Qu.:1.0000     3rd Qu.:0.00000     3rd Qu.:0.00000     3rd Qu.:1.0000    
##  Max.   :1.0000     Max.   :1.00000     Max.   :1.00000     Max.   :1.0000    
##  garage_type_no_garage NeighborhoodBrDale neighborhood_brk_side
##  Min.   :0.00000       Min.   :0.00000    Min.   :0.00000      
##  1st Qu.:0.00000       1st Qu.:0.00000    1st Qu.:0.00000      
##  Median :0.00000       Median :0.00000    Median :0.00000      
##  Mean   :0.05664       Mean   :0.01119    Mean   :0.04056      
##  3rd Qu.:0.00000       3rd Qu.:0.00000    3rd Qu.:0.00000      
##  Max.   :1.00000       Max.   :1.00000    Max.   :1.00000      
##  neighborhood_clear_cr neighborhood_collg_cr neighborhood_crawfor
##  Min.   :0.00000       Min.   :0.0000        Min.   :0.00000     
##  1st Qu.:0.00000       1st Qu.:0.0000        1st Qu.:0.00000     
##  Median :0.00000       Median :0.0000        Median :0.00000     
##  Mean   :0.01958       Mean   :0.1049        Mean   :0.03566     
##  3rd Qu.:0.00000       3rd Qu.:0.0000        3rd Qu.:0.00000     
##  Max.   :1.00000       Max.   :1.0000        Max.   :1.00000     
##  neighborhood_edwards neighborhood_gilbert neighborhood_idottrr
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.00000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.00000     
##  Median :0.00000      Median :0.00000      Median :0.00000     
##  Mean   :0.06853      Mean   :0.05524      Mean   :0.02587     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.00000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.00000     
##  neighborhood_meadowv neighborhood_mitchel neighborhood_n_ames
##  Min.   :0.00000      Min.   :0.00000      Min.   :0.0000     
##  1st Qu.:0.00000      1st Qu.:0.00000      1st Qu.:0.0000     
##  Median :0.00000      Median :0.00000      Median :0.0000     
##  Mean   :0.01189      Mean   :0.03427      Mean   :0.1573     
##  3rd Qu.:0.00000      3rd Qu.:0.00000      3rd Qu.:0.0000     
##  Max.   :1.00000      Max.   :1.00000      Max.   :1.0000     
##  neighborhood_n_ridge neighborhood_n_ridge_hghts neighborhood_n_w_ames
##  Min.   :0.00000      Min.   :0.00000            Min.   :0.00000      
##  1st Qu.:0.00000      1st Qu.:0.00000            1st Qu.:0.00000      
##  Median :0.00000      Median :0.00000            Median :0.00000      
##  Mean   :0.02867      Mean   :0.05385            Mean   :0.05105      
##  3rd Qu.:0.00000      3rd Qu.:0.00000            3rd Qu.:0.00000      
##  Max.   :1.00000      Max.   :1.00000            Max.   :1.00000      
##  neighborhood_old_town neighborhood_sawyer neighborhood_sawyer_w
##  Min.   :0.00000       Min.   :0.00000     Min.   :0.00000      
##  1st Qu.:0.00000       1st Qu.:0.00000     1st Qu.:0.00000      
##  Median :0.00000       Median :0.00000     Median :0.00000      
##  Mean   :0.07902       Mean   :0.05175     Mean   :0.04126      
##  3rd Qu.:0.00000       3rd Qu.:0.00000     3rd Qu.:0.00000      
##  Max.   :1.00000       Max.   :1.00000     Max.   :1.00000      
##  neighborhood_somerst neighborhood_stone_br neighborhood_swisu
##  Min.   :0.00000      Min.   :0.00000       Min.   :0.00000   
##  1st Qu.:0.00000      1st Qu.:0.00000       1st Qu.:0.00000   
##  Median :0.00000      Median :0.00000       Median :0.00000   
##  Mean   :0.06014      Mean   :0.01748       Mean   :0.01748   
##  3rd Qu.:0.00000      3rd Qu.:0.00000       3rd Qu.:0.00000   
##  Max.   :1.00000      Max.   :1.00000       Max.   :1.00000   
##  neighborhood_timber neighborhood_veenker neighborhood_saleprice
##  Min.   :0.00000     Min.   :0.000000     Min.   :101389        
##  1st Qu.:0.00000     1st Qu.:0.000000     1st Qu.:136902        
##  Median :0.00000     Median :0.000000     Median :181841        
##  Mean   :0.02657     Mean   :0.007692     Mean   :180909        
##  3rd Qu.:0.00000     3rd Qu.:0.000000     3rd Qu.:198518        
##  Max.   :1.00000     Max.   :1.000000     Max.   :331835        
##  log_rel_price      log_sale_price  sale_price_per_sqft  bath_pr_sqft      
##  Min.   :-1.19109   Min.   :10.46   Min.   :0.0001087   Min.   :0.0002562  
##  1st Qu.:-0.18330   1st Qu.:11.77   1st Qu.:0.0002471   1st Qu.:0.0006980  
##  Median :-0.03704   Median :12.00   Median :0.0002804   Median :0.0008476  
##  Mean   :-0.03482   Mean   :12.02   Mean   :0.0002850   Mean   :0.0008319  
##  3rd Qu.: 0.11353   3rd Qu.:12.28   3rd Qu.:0.0003160   3rd Qu.:0.0010070  
##  Max.   : 1.33727   Max.   :13.53   Max.   :0.0009952   Max.   :0.0032157  
##   student_apt    
##  Min.   :0.0000  
##  1st Qu.:0.0000  
##  Median :0.0000  
##  Mean   :0.2455  
##  3rd Qu.:0.0000  
##  Max.   :1.0000

Task 4 - Inspect the garage_type variables

No code is provided.

df %>% filter(
  (garage_type_attchd + garage_type_basment + garage_type_detchd + garage_type_builtIn + garage_type_no_garage) > 0) %>%
    head()
##   applicants sale_price num_units year_sold month_sold overall_qual
## 1          3     208500         4      2008          2            7
## 2          5     181500         5      2007          5            6
## 3          3     223500         1      2008          9            7
## 4          4     140000         3      2011          2            7
## 5          5     250000         5      2008         12            8
## 6          5     143000         3      2009         10            5
##   total_sq_feet gr_liv_area tot_bathrooms lot_area exter_qual full_bath
## 1      3586.886    2785.060             5 3898.256          4         4
## 2      3560.348    2315.267             3 4149.410          3         4
## 3      3672.314    2852.313             5 4461.603          4         4
## 4      3527.523    2791.378             3 4139.131          3         2
## 5      4012.276    3173.357             5 4928.292          4         4
## 6      3308.431    2433.181             3 4908.174          3         2
##   central_air garage_type_attchd garage_type_basment garage_type_builtIn
## 1         yes                  1                   0                   0
## 2         yes                  1                   0                   0
## 3         yes                  1                   0                   0
## 4         yes                  0                   0                   0
## 5         yes                  1                   0                   0
## 6         yes                  1                   0                   0
##   garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## 1                  0                     0                  0
## 2                  0                     0                  0
## 3                  0                     0                  0
## 4                  1                     0                  0
## 5                  0                     0                  0
## 6                  0                     0                  0
##   neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## 1                     0                     0                     1
## 2                     0                     0                     0
## 3                     0                     0                     1
## 4                     0                     0                     0
## 5                     0                     0                     0
## 6                     0                     0                     0
##   neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## 1                    0                    0                    0
## 2                    0                    0                    0
## 3                    0                    0                    0
## 4                    1                    0                    0
## 5                    0                    0                    0
## 6                    0                    0                    0
##   neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## 1                    0                    0                    0
## 2                    0                    0                    0
## 3                    0                    0                    0
## 4                    0                    0                    0
## 5                    0                    0                    0
## 6                    0                    0                    1
##   neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## 1                   0                    0                          0
## 2                   0                    0                          0
## 3                   0                    0                          0
## 4                   0                    0                          0
## 5                   0                    1                          0
## 6                   0                    0                          0
##   neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## 1                     0                     0                   0
## 2                     0                     0                   0
## 3                     0                     0                   0
## 4                     0                     0                   0
## 5                     0                     0                   0
## 6                     0                     0                   0
##   neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## 1                     0                    0                     0
## 2                     0                    0                     0
## 3                     0                    0                     0
## 4                     0                    0                     0
## 5                     0                    0                     0
## 6                     0                    0                     0
##   neighborhood_swisu neighborhood_timber neighborhood_veenker
## 1                  0                   0                    0
## 2                  0                   0                    1
## 3                  0                   0                    0
## 4                  0                   0                    0
## 5                  0                   0                    0
## 6                  0                   0                    0
##   neighborhood_saleprice log_rel_price log_sale_price sale_price_per_sqft
## 1               198517.7    0.04906067       12.24769        0.0002928122
## 2               245890.6   -0.30363117       12.10901        0.0002073205
## 3               198517.7    0.11853305       12.31717        0.0003065762
## 4               213681.7   -0.42284528       11.84940        0.0001857338
## 5               331835.3   -0.28317780       12.42922        0.0001877702
## 6               157755.8   -0.09820383       11.87060        0.0002739861
##   bath_pr_sqft student_apt
## 1 0.0013939666           0
## 2 0.0008426143           0
## 3 0.0013615394           0
## 4 0.0008504552           0
## 5 0.0012461753           0
## 6 0.0009067742           0
#rows_with_errors <- (df$garage_type_attchd==0) + (df$garage_type_detchd==0) + (df$garage_type_builtIn==0) + (df$garage_type_no_garage==0)

#df <- df[-rows_with_errors]

No changes are needed to the below code. It is included because the results may be helpful for subsequent tasks.

#NO CHANGES NEEDED
#Example: the 18th property has 0's for all garage type variables
df %>% dplyr::select(contains("garage")) %>%  dplyr::slice(18)
##   garage_type_attchd garage_type_basment garage_type_builtIn garage_type_detchd
## 1                  0                   0                   0                  0
##   garage_type_no_garage
## 1                     0
#NO CHANGES NEEDED
#Calculate the Number of Units for each garage type
df %>% dplyr::select(num_units,contains("garage")) %>% gather(feature,value,-num_units) %>% group_by(feature) %>%  summarise(total_units=sum(num_units*value))
## # A tibble: 5 × 2
##   feature               total_units
##   <chr>                       <dbl>
## 1 garage_type_attchd           2227
## 2 garage_type_basment            55
## 3 garage_type_builtIn           235
## 4 garage_type_detchd           1052
## 5 garage_type_no_garage         226
#NO CHANGES NEEDED
#Calculate the Number of Units for each neighborhood
df %>% dplyr::select(num_units,contains("neighborhood")) %>% gather(feature,value,-num_units) %>% group_by(feature) %>%  summarise(total_units=sum(num_units*value))
## # A tibble: 23 × 2
##    feature               total_units
##    <chr>                       <dbl>
##  1 NeighborhoodBrDale             44
##  2 neighborhood_brk_side         170
##  3 neighborhood_clear_cr          72
##  4 neighborhood_collg_cr         390
##  5 neighborhood_crawfor          137
##  6 neighborhood_edwards          259
##  7 neighborhood_gilbert          204
##  8 neighborhood_idottrr          112
##  9 neighborhood_meadowv           48
## 10 neighborhood_mitchel          141
## # ℹ 13 more rows

You do not need to check the neighborhood columns as your assistent has already verified that these are correct for all properties.

Task 5 - Select GLM parameters

No code is provided

Task 6 - Fit a GLM

#NO CHANGES NEEDED
#Create training and test sets
index <- createDataPartition(y = df$applicants, p = 0.8, list = F)

train <- df %>% dplyr::slice(index)
test <- df %>% dplyr::slice(-index)

train_x <- train[,-1]
train_y <- train$applicants

#standardize to be between 0 and 1
train_number_of_units <- train$num_units/sum(train$num_units)

test_x <- test[,-1]
test_y <- test$applicants

#standardize to be between 0 and 1
test_number_of_units <- test$num_units/sum(test$num_units)
#the base (reference) level for neighborhood is the one with the most observations
#the base (reference) level for garage is the one with the most observations

# Create the log_sale_price variable
train <- train %>% 
  mutate(log_sale_price = log(sale_price))

test <- test %>% 
  mutate(log_sale_price = log(sale_price))

#Fit a GLM
#Do not use the offset or weight variables as predictors.  Remove them from the formula (as the below is doing)
#Type ?family into the R console to see options for FAMILY
glm <- glm(
  applicants ~ . + offset(log(num_units)) - neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price, 
  data = train,
  family = poisson(link = "log")
)
AIC(glm)
## [1] 5228.818
summary(glm)
## 
## Call:
## glm(formula = applicants ~ . + offset(log(num_units)) - neighborhood_n_ames - 
##     garage_type_attchd - num_units - log_sale_price, family = poisson(link = "log"), 
##     data = train)
## 
## Coefficients: (1 not defined because of singularities)
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 4.612e+00  2.188e+01   0.211 0.833062    
## sale_price                  6.177e-07  8.633e-07   0.716 0.474284    
## year_sold                  -3.159e-03  1.088e-02  -0.290 0.771571    
## month_sold                 -7.745e-03  6.154e-03  -1.258 0.208227    
## overall_qual               -4.370e-02  2.487e-02  -1.758 0.078831 .  
## total_sq_feet               6.023e-04  1.234e-04   4.879 1.07e-06 ***
## gr_liv_area                -1.777e-05  7.970e-05  -0.223 0.823556    
## tot_bathrooms              -3.178e-01  1.119e-01  -2.841 0.004502 ** 
## lot_area                    3.411e-06  2.402e-05   0.142 0.887072    
## exter_qual                  1.621e-01  4.573e-02   3.544 0.000394 ***
## full_bath                   1.128e-02  2.489e-02   0.453 0.650537    
## central_airno              -1.780e+00  1.625e-01 -10.950  < 2e-16 ***
## garage_type_basment        -6.379e-01  1.790e-01  -3.563 0.000366 ***
## garage_type_builtIn        -2.797e-01  7.508e-02  -3.725 0.000195 ***
## garage_type_detchd         -3.346e-01  5.183e-02  -6.455 1.08e-10 ***
## garage_type_no_garage      -3.939e-01  9.798e-02  -4.021 5.80e-05 ***
## NeighborhoodBrDale         -4.755e-01  2.562e-01  -1.856 0.063429 .  
## neighborhood_brk_side      -1.957e-01  1.163e-01  -1.684 0.092249 .  
## neighborhood_clear_cr      -4.917e-02  1.275e-01  -0.386 0.699756    
## neighborhood_collg_cr      -8.473e-02  8.668e-02  -0.977 0.328356    
## neighborhood_crawfor       -1.207e-01  1.202e-01  -1.004 0.315313    
## neighborhood_edwards        7.295e-02  8.902e-02   0.819 0.412545    
## neighborhood_gilbert        1.496e-02  9.569e-02   0.156 0.875738    
## neighborhood_idottrr       -8.539e-01  2.115e-01  -4.037 5.41e-05 ***
## neighborhood_meadowv       -3.208e-01  2.008e-01  -1.597 0.110193    
## neighborhood_mitchel       -1.280e-01  9.581e-02  -1.337 0.181385    
## neighborhood_n_ridge       -3.254e-01  1.969e-01  -1.653 0.098379 .  
## neighborhood_n_ridge_hghts -3.719e-01  1.735e-01  -2.143 0.032097 *  
## neighborhood_n_w_ames      -8.980e-02  9.123e-02  -0.984 0.324963    
## neighborhood_old_town      -4.565e-02  8.864e-02  -0.515 0.606522    
## neighborhood_sawyer        -4.130e-01  9.983e-02  -4.137 3.52e-05 ***
## neighborhood_sawyer_w      -5.371e-02  9.290e-02  -0.578 0.563154    
## neighborhood_somerst       -3.092e-02  1.227e-01  -0.252 0.801017    
## neighborhood_stone_br      -1.747e-01  1.993e-01  -0.877 0.380701    
## neighborhood_swisu          2.543e-01  1.504e-01   1.691 0.090895 .  
## neighborhood_timber        -1.799e-01  1.442e-01  -1.248 0.212172    
## neighborhood_veenker       -3.291e-01  2.109e-01  -1.560 0.118722    
## neighborhood_saleprice             NA         NA      NA       NA    
## log_rel_price              -7.583e-01  3.327e-01  -2.280 0.022635 *  
## sale_price_per_sqft        -1.534e+03  1.210e+03  -1.268 0.204712    
## bath_pr_sqft                1.234e+03  3.836e+02   3.217 0.001294 ** 
## student_apt                 4.999e-01  3.452e-02  14.482  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2784.1  on 1144  degrees of freedom
## Residual deviance: 1722.6  on 1104  degrees of freedom
## AIC: 5228.8
## 
## Number of Fisher Scoring iterations: 5
#Alternative answer
#Models claim frequency (average applicants per unit)
glm2 <- glm(
  applicants/num_units ~ . + - neighborhood_n_ames - garage_type_attchd  - log_sale_price - num_units, 
  data = train,
  weights = num_units,
  family = poisson(link = "log")
)
AIC(glm2)
## [1] Inf
summary(glm2)
## 
## Call:
## glm(formula = applicants/num_units ~ . + -neighborhood_n_ames - 
##     garage_type_attchd - log_sale_price - num_units, family = poisson(link = "log"), 
##     data = train, weights = num_units)
## 
## Coefficients: (2 not defined because of singularities)
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 4.283e+00  2.187e+01   0.196 0.844719    
## sale_price                  6.177e-07  8.633e-07   0.716 0.474284    
## year_sold                  -3.159e-03  1.088e-02  -0.290 0.771571    
## month_sold                 -7.745e-03  6.154e-03  -1.258 0.208227    
## overall_qual               -4.370e-02  2.487e-02  -1.758 0.078831 .  
## total_sq_feet               6.023e-04  1.234e-04   4.879 1.07e-06 ***
## gr_liv_area                -1.777e-05  7.970e-05  -0.223 0.823556    
## tot_bathrooms              -3.178e-01  1.119e-01  -2.841 0.004502 ** 
## lot_area                    3.411e-06  2.402e-05   0.142 0.887072    
## exter_qual                  1.621e-01  4.573e-02   3.544 0.000394 ***
## full_bath                   1.128e-02  2.489e-02   0.453 0.650538    
## central_airno              -1.780e+00  1.625e-01 -10.950  < 2e-16 ***
## garage_type_basment        -6.379e-01  1.790e-01  -3.563 0.000366 ***
## garage_type_builtIn        -2.797e-01  7.508e-02  -3.725 0.000195 ***
## garage_type_detchd         -3.346e-01  5.183e-02  -6.455 1.08e-10 ***
## garage_type_no_garage      -3.939e-01  9.798e-02  -4.021 5.80e-05 ***
## NeighborhoodBrDale         -1.464e-01  3.439e-01  -0.426 0.670320    
## neighborhood_brk_side       1.333e-01  2.485e-01   0.537 0.591566    
## neighborhood_clear_cr       2.799e-01  2.124e-01   1.318 0.187653    
## neighborhood_collg_cr       2.443e-01  1.925e-01   1.269 0.204319    
## neighborhood_crawfor        2.084e-01  2.065e-01   1.009 0.313056    
## neighborhood_edwards        4.020e-01  2.354e-01   1.708 0.087652 .  
## neighborhood_gilbert        3.440e-01  1.990e-01   1.729 0.083767 .  
## neighborhood_idottrr       -5.248e-01  3.120e-01  -1.682 0.092607 .  
## neighborhood_meadowv        8.288e-03  3.029e-01   0.027 0.978168    
## neighborhood_mitchel        2.010e-01  2.183e-01   0.921 0.357056    
## neighborhood_n_ames         3.291e-01  2.109e-01   1.560 0.118722    
## neighborhood_n_ridge        3.637e-03  2.102e-01   0.017 0.986197    
## neighborhood_n_ridge_hghts -4.286e-02  2.010e-01  -0.213 0.831107    
## neighborhood_n_w_ames       2.393e-01  1.982e-01   1.207 0.227450    
## neighborhood_old_town       2.834e-01  2.310e-01   1.227 0.219763    
## neighborhood_sawyer        -8.389e-02  2.315e-01  -0.362 0.717115    
## neighborhood_sawyer_w       2.754e-01  2.029e-01   1.357 0.174676    
## neighborhood_somerst        2.981e-01  1.954e-01   1.526 0.127104    
## neighborhood_stone_br       1.544e-01  2.213e-01   0.698 0.485302    
## neighborhood_swisu          5.834e-01  2.565e-01   2.274 0.022956 *  
## neighborhood_timber         1.492e-01  2.034e-01   0.734 0.463238    
## neighborhood_veenker               NA         NA      NA       NA    
## neighborhood_saleprice             NA         NA      NA       NA    
## log_rel_price              -7.583e-01  3.327e-01  -2.280 0.022635 *  
## sale_price_per_sqft        -1.534e+03  1.210e+03  -1.268 0.204713    
## bath_pr_sqft                1.234e+03  3.836e+02   3.217 0.001294 ** 
## student_apt                 4.999e-01  3.452e-02  14.482  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2784.1  on 1144  degrees of freedom
## Residual deviance: 1722.6  on 1104  degrees of freedom
## AIC: Inf
## 
## Number of Fisher Scoring iterations: 5
#You can verify that both models return the same predictions

app_count <- predict(glm, type="response", newdata = test)

app_frequency <- predict(glm2, type="response", newdata = test)*test$num_units

head(app_count)
##         1         2         3         4         5         6 
##  7.466909  1.381043  1.193778  3.390522  3.733967 10.568552
head(app_frequency)
##         1         2         3         4         5         6 
##  7.466909  1.381043  1.193778  3.390522  3.733967 10.568552

Task 7 - Use AIC to select features

#NO CODE CHANGES NEEDED
#The following code will use the same formula and family from the `glm` object to perform stepwise selection

library(MASS)
stepwise_result <- stepAIC(glm)
## Start:  AIC=5228.82
## applicants ~ sale_price + num_units + year_sold + month_sold + 
##     overall_qual + total_sq_feet + gr_liv_area + tot_bathrooms + 
##     lot_area + exter_qual + full_bath + central_air + garage_type_attchd + 
##     garage_type_basment + garage_type_builtIn + garage_type_detchd + 
##     garage_type_no_garage + NeighborhoodBrDale + neighborhood_brk_side + 
##     neighborhood_clear_cr + neighborhood_collg_cr + neighborhood_crawfor + 
##     neighborhood_edwards + neighborhood_gilbert + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ames + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w + 
##     neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + neighborhood_saleprice + 
##     log_rel_price + log_sale_price + sale_price_per_sqft + bath_pr_sqft + 
##     student_apt + offset(log(num_units)) - neighborhood_n_ames - 
##     garage_type_attchd - num_units - log_sale_price
## 
## 
## Step:  AIC=5228.82
## applicants ~ sale_price + year_sold + month_sold + overall_qual + 
##     total_sq_feet + gr_liv_area + tot_bathrooms + lot_area + 
##     exter_qual + full_bath + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr + 
##     neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards + 
##     neighborhood_gilbert + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts + 
##     neighborhood_n_w_ames + neighborhood_old_town + neighborhood_sawyer + 
##     neighborhood_sawyer_w + neighborhood_somerst + neighborhood_stone_br + 
##     neighborhood_swisu + neighborhood_timber + neighborhood_veenker + 
##     log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## - lot_area                    1   1722.6 5226.8
## - neighborhood_gilbert        1   1722.6 5226.8
## - gr_liv_area                 1   1722.7 5226.9
## - neighborhood_somerst        1   1722.7 5226.9
## - year_sold                   1   1722.7 5226.9
## - neighborhood_clear_cr       1   1722.8 5227.0
## - full_bath                   1   1722.8 5227.0
## - neighborhood_old_town       1   1722.9 5227.1
## - neighborhood_sawyer_w       1   1722.9 5227.2
## - sale_price                  1   1723.1 5227.3
## - neighborhood_edwards        1   1723.3 5227.5
## - neighborhood_stone_br       1   1723.4 5227.6
## - neighborhood_collg_cr       1   1723.6 5227.8
## - neighborhood_n_w_ames       1   1723.6 5227.8
## - neighborhood_crawfor        1   1723.6 5227.8
## - neighborhood_timber         1   1724.2 5228.4
## - month_sold                  1   1724.2 5228.4
## - sale_price_per_sqft         1   1724.3 5228.5
## - neighborhood_mitchel        1   1724.4 5228.6
## <none>                            1722.6 5228.8
## - neighborhood_veenker        1   1725.2 5229.4
## - neighborhood_swisu          1   1725.3 5229.5
## - neighborhood_n_ridge        1   1725.3 5229.5
## - neighborhood_meadowv        1   1725.3 5229.5
## - neighborhood_brk_side       1   1725.5 5229.7
## - overall_qual                1   1725.7 5229.9
## - NeighborhoodBrDale          1   1726.5 5230.7
## - neighborhood_n_ridge_hghts  1   1727.1 5231.3
## - log_rel_price               1   1727.5 5231.7
## - tot_bathrooms               1   1730.6 5234.8
## - bath_pr_sqft                1   1732.8 5237.0
## - exter_qual                  1   1735.1 5239.3
## - garage_type_builtIn         1   1737.3 5241.5
## - garage_type_basment         1   1737.9 5242.1
## - garage_type_no_garage       1   1739.8 5244.0
## - neighborhood_sawyer         1   1741.2 5245.4
## - total_sq_feet               1   1741.5 5245.7
## - neighborhood_idottrr        1   1742.7 5246.9
## - garage_type_detchd          1   1765.4 5269.6
## - central_air                 1   1916.0 5420.2
## - student_apt                 1   1923.6 5427.8
## 
## Step:  AIC=5226.84
## applicants ~ sale_price + year_sold + month_sold + overall_qual + 
##     total_sq_feet + gr_liv_area + tot_bathrooms + exter_qual + 
##     full_bath + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_gilbert + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w + 
##     neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_gilbert        1   1722.7 5224.9
## - gr_liv_area                 1   1722.7 5224.9
## - neighborhood_somerst        1   1722.7 5224.9
## - year_sold                   1   1722.7 5224.9
## - neighborhood_clear_cr       1   1722.8 5225.0
## - full_bath                   1   1722.8 5225.0
## - neighborhood_old_town       1   1722.9 5225.1
## - neighborhood_sawyer_w       1   1723.0 5225.2
## - sale_price                  1   1723.2 5225.4
## - neighborhood_edwards        1   1723.3 5225.5
## - neighborhood_stone_br       1   1723.4 5225.6
## - neighborhood_n_w_ames       1   1723.6 5225.8
## - neighborhood_collg_cr       1   1723.6 5225.8
## - neighborhood_crawfor        1   1723.6 5225.9
## - neighborhood_timber         1   1724.2 5226.4
## - month_sold                  1   1724.2 5226.4
## - sale_price_per_sqft         1   1724.3 5226.5
## - neighborhood_mitchel        1   1724.4 5226.7
## <none>                            1722.6 5226.8
## - neighborhood_veenker        1   1725.2 5227.4
## - neighborhood_swisu          1   1725.3 5227.5
## - neighborhood_n_ridge        1   1725.3 5227.6
## - neighborhood_brk_side       1   1725.6 5227.8
## - overall_qual                1   1725.9 5228.1
## - neighborhood_meadowv        1   1725.9 5228.1
## - NeighborhoodBrDale          1   1727.0 5229.2
## - neighborhood_n_ridge_hghts  1   1727.2 5229.5
## - log_rel_price               1   1727.5 5229.7
## - tot_bathrooms               1   1730.7 5232.9
## - bath_pr_sqft                1   1732.9 5235.1
## - exter_qual                  1   1735.1 5237.3
## - garage_type_builtIn         1   1737.4 5239.6
## - garage_type_basment         1   1737.9 5240.1
## - garage_type_no_garage       1   1740.0 5242.2
## - neighborhood_sawyer         1   1741.2 5243.4
## - total_sq_feet               1   1741.7 5243.9
## - neighborhood_idottrr        1   1742.7 5244.9
## - garage_type_detchd          1   1766.2 5268.5
## - central_air                 1   1916.1 5418.3
## - student_apt                 1   1923.7 5425.9
## 
## Step:  AIC=5224.86
## applicants ~ sale_price + year_sold + month_sold + overall_qual + 
##     total_sq_feet + gr_liv_area + tot_bathrooms + exter_qual + 
##     full_bath + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town + 
##     neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_somerst + 
##     neighborhood_stone_br + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - gr_liv_area                 1   1722.7 5222.9
## - year_sold                   1   1722.7 5223.0
## - neighborhood_clear_cr       1   1722.9 5223.1
## - full_bath                   1   1722.9 5223.1
## - neighborhood_somerst        1   1722.9 5223.1
## - neighborhood_old_town       1   1723.0 5223.2
## - neighborhood_sawyer_w       1   1723.3 5223.5
## - neighborhood_edwards        1   1723.3 5223.5
## - sale_price                  1   1723.5 5223.7
## - neighborhood_stone_br       1   1724.1 5224.3
## - neighborhood_crawfor        1   1724.1 5224.3
## - month_sold                  1   1724.3 5224.5
## - neighborhood_n_w_ames       1   1724.3 5224.5
## - sale_price_per_sqft         1   1724.6 5224.8
## <none>                            1722.7 5224.9
## - neighborhood_collg_cr       1   1724.8 5225.0
## - neighborhood_mitchel        1   1724.8 5225.0
## - neighborhood_timber         1   1725.2 5225.5
## - neighborhood_swisu          1   1725.3 5225.5
## - neighborhood_brk_side       1   1725.7 5225.9
## - neighborhood_veenker        1   1725.8 5226.1
## - neighborhood_meadowv        1   1725.9 5226.1
## - overall_qual                1   1725.9 5226.1
## - NeighborhoodBrDale          1   1727.0 5227.2
## - neighborhood_n_ridge        1   1727.3 5227.5
## - log_rel_price               1   1727.5 5227.8
## - neighborhood_n_ridge_hghts  1   1731.1 5231.3
## - tot_bathrooms               1   1731.3 5231.5
## - bath_pr_sqft                1   1733.8 5234.0
## - exter_qual                  1   1735.6 5235.8
## - garage_type_builtIn         1   1737.4 5237.6
## - garage_type_basment         1   1737.9 5238.1
## - garage_type_no_garage       1   1740.4 5240.7
## - total_sq_feet               1   1741.8 5242.0
## - neighborhood_sawyer         1   1741.8 5242.0
## - neighborhood_idottrr        1   1742.8 5243.0
## - garage_type_detchd          1   1767.4 5267.7
## - central_air                 1   1919.3 5419.6
## - student_apt                 1   1923.8 5424.0
## 
## Step:  AIC=5222.91
## applicants ~ sale_price + year_sold + month_sold + overall_qual + 
##     total_sq_feet + tot_bathrooms + exter_qual + full_bath + 
##     central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town + 
##     neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_somerst + 
##     neighborhood_stone_br + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - year_sold                   1   1722.8 5221.0
## - full_bath                   1   1722.9 5221.1
## - neighborhood_somerst        1   1722.9 5221.2
## - neighborhood_clear_cr       1   1722.9 5221.2
## - neighborhood_old_town       1   1723.0 5221.2
## - neighborhood_sawyer_w       1   1723.3 5221.5
## - neighborhood_edwards        1   1723.3 5221.6
## - sale_price                  1   1723.6 5221.8
## - neighborhood_stone_br       1   1724.1 5222.3
## - neighborhood_crawfor        1   1724.2 5222.5
## - month_sold                  1   1724.3 5222.5
## - neighborhood_n_w_ames       1   1724.4 5222.6
## - sale_price_per_sqft         1   1724.7 5222.9
## <none>                            1722.7 5222.9
## - neighborhood_collg_cr       1   1724.8 5223.0
## - neighborhood_mitchel        1   1724.8 5223.0
## - neighborhood_timber         1   1725.3 5223.5
## - neighborhood_swisu          1   1725.3 5223.5
## - neighborhood_brk_side       1   1725.7 5224.0
## - neighborhood_veenker        1   1725.9 5224.1
## - neighborhood_meadowv        1   1726.0 5224.2
## - overall_qual                1   1726.0 5224.2
## - NeighborhoodBrDale          1   1727.1 5225.3
## - neighborhood_n_ridge        1   1727.5 5225.7
## - log_rel_price               1   1727.6 5225.8
## - neighborhood_n_ridge_hghts  1   1731.1 5229.3
## - tot_bathrooms               1   1731.3 5229.6
## - bath_pr_sqft                1   1733.8 5232.0
## - exter_qual                  1   1735.8 5234.0
## - garage_type_basment         1   1738.2 5236.4
## - garage_type_builtIn         1   1739.3 5237.5
## - garage_type_no_garage       1   1740.5 5238.7
## - neighborhood_sawyer         1   1741.8 5240.1
## - neighborhood_idottrr        1   1742.9 5241.1
## - total_sq_feet               1   1743.7 5241.9
## - garage_type_detchd          1   1767.7 5265.9
## - central_air                 1   1921.7 5419.9
## - student_apt                 1   1923.9 5422.1
## 
## Step:  AIC=5221
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + full_bath + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr + 
##     neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w + 
##     neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - full_bath                   1   1723.0 5219.2
## - neighborhood_clear_cr       1   1723.0 5219.2
## - neighborhood_somerst        1   1723.0 5219.2
## - neighborhood_old_town       1   1723.1 5219.3
## - neighborhood_sawyer_w       1   1723.4 5219.6
## - neighborhood_edwards        1   1723.4 5219.6
## - sale_price                  1   1723.7 5219.9
## - neighborhood_stone_br       1   1724.2 5220.4
## - month_sold                  1   1724.3 5220.5
## - neighborhood_crawfor        1   1724.3 5220.6
## - neighborhood_n_w_ames       1   1724.5 5220.7
## - sale_price_per_sqft         1   1724.8 5221.0
## <none>                            1722.8 5221.0
## - neighborhood_collg_cr       1   1724.8 5221.1
## - neighborhood_mitchel        1   1724.9 5221.1
## - neighborhood_timber         1   1725.3 5221.5
## - neighborhood_swisu          1   1725.4 5221.6
## - neighborhood_brk_side       1   1725.9 5222.1
## - neighborhood_veenker        1   1726.0 5222.2
## - neighborhood_meadowv        1   1726.1 5222.3
## - overall_qual                1   1726.2 5222.4
## - NeighborhoodBrDale          1   1727.2 5223.4
## - neighborhood_n_ridge        1   1727.6 5223.8
## - log_rel_price               1   1727.7 5223.9
## - neighborhood_n_ridge_hghts  1   1731.1 5227.4
## - tot_bathrooms               1   1731.5 5227.7
## - bath_pr_sqft                1   1734.0 5230.2
## - exter_qual                  1   1735.9 5232.1
## - garage_type_basment         1   1738.2 5234.5
## - garage_type_builtIn         1   1739.5 5235.7
## - garage_type_no_garage       1   1740.5 5236.7
## - neighborhood_sawyer         1   1741.9 5238.1
## - neighborhood_idottrr        1   1742.9 5239.1
## - total_sq_feet               1   1743.8 5240.0
## - garage_type_detchd          1   1767.7 5263.9
## - central_air                 1   1922.6 5418.8
## - student_apt                 1   1924.5 5420.7
## 
## Step:  AIC=5219.22
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr + 
##     neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w + 
##     neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_somerst        1   1723.2 5217.5
## - neighborhood_clear_cr       1   1723.3 5217.5
## - neighborhood_old_town       1   1723.3 5217.5
## - neighborhood_sawyer_w       1   1723.6 5217.8
## - neighborhood_edwards        1   1723.7 5217.9
## - sale_price                  1   1724.0 5218.2
## - month_sold                  1   1724.5 5218.7
## - neighborhood_stone_br       1   1724.6 5218.8
## - neighborhood_n_w_ames       1   1724.7 5218.9
## - neighborhood_crawfor        1   1724.7 5218.9
## - sale_price_per_sqft         1   1725.0 5219.2
## <none>                            1723.0 5219.2
## - neighborhood_collg_cr       1   1725.1 5219.3
## - neighborhood_mitchel        1   1725.1 5219.3
## - neighborhood_timber         1   1725.7 5219.9
## - neighborhood_swisu          1   1725.7 5219.9
## - neighborhood_brk_side       1   1726.1 5220.3
## - overall_qual                1   1726.2 5220.4
## - neighborhood_meadowv        1   1726.3 5220.5
## - neighborhood_veenker        1   1726.5 5220.7
## - NeighborhoodBrDale          1   1727.3 5221.6
## - log_rel_price               1   1728.2 5222.4
## - neighborhood_n_ridge        1   1728.3 5222.5
## - tot_bathrooms               1   1731.6 5225.8
## - neighborhood_n_ridge_hghts  1   1731.8 5226.0
## - bath_pr_sqft                1   1734.3 5228.5
## - exter_qual                  1   1736.4 5230.6
## - garage_type_basment         1   1738.5 5232.7
## - garage_type_builtIn         1   1739.5 5233.7
## - garage_type_no_garage       1   1740.7 5234.9
## - neighborhood_sawyer         1   1742.0 5236.3
## - neighborhood_idottrr        1   1743.0 5237.2
## - total_sq_feet               1   1745.2 5239.4
## - garage_type_detchd          1   1768.5 5262.7
## - central_air                 1   1922.6 5416.8
## - student_apt                 1   1926.9 5421.1
## 
## Step:  AIC=5217.45
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr + 
##     neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w + 
##     neighborhood_stone_br + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_clear_cr       1   1723.4 5215.6
## - neighborhood_old_town       1   1723.5 5215.7
## - neighborhood_sawyer_w       1   1723.7 5215.9
## - neighborhood_edwards        1   1723.8 5216.1
## - sale_price                  1   1724.0 5216.2
## - neighborhood_stone_br       1   1724.6 5216.8
## - neighborhood_n_w_ames       1   1724.7 5216.9
## - neighborhood_crawfor        1   1724.7 5216.9
## - month_sold                  1   1724.7 5216.9
## - sale_price_per_sqft         1   1725.0 5217.2
## - neighborhood_mitchel        1   1725.2 5217.4
## <none>                            1723.2 5217.5
## - neighborhood_collg_cr       1   1725.2 5217.5
## - neighborhood_timber         1   1725.8 5218.0
## - neighborhood_swisu          1   1726.0 5218.2
## - neighborhood_veenker        1   1726.5 5218.7
## - neighborhood_brk_side       1   1726.5 5218.7
## - neighborhood_meadowv        1   1726.8 5219.0
## - overall_qual                1   1727.0 5219.3
## - NeighborhoodBrDale          1   1727.8 5220.0
## - log_rel_price               1   1728.2 5220.4
## - neighborhood_n_ridge        1   1729.7 5221.9
## - tot_bathrooms               1   1731.6 5223.8
## - bath_pr_sqft                1   1734.3 5226.5
## - neighborhood_n_ridge_hghts  1   1735.9 5228.2
## - exter_qual                  1   1736.5 5228.7
## - garage_type_basment         1   1738.7 5230.9
## - garage_type_builtIn         1   1739.5 5231.7
## - garage_type_no_garage       1   1740.8 5233.0
## - neighborhood_sawyer         1   1742.4 5234.6
## - neighborhood_idottrr        1   1744.0 5236.2
## - total_sq_feet               1   1745.2 5237.5
## - garage_type_detchd          1   1769.8 5262.0
## - central_air                 1   1923.6 5415.8
## - student_apt                 1   1929.5 5421.7
## 
## Step:  AIC=5215.6
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town + 
##     neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_stone_br + 
##     neighborhood_swisu + neighborhood_timber + neighborhood_veenker + 
##     log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_old_town       1   1723.7 5213.9
## - neighborhood_sawyer_w       1   1723.8 5214.0
## - neighborhood_edwards        1   1724.0 5214.2
## - sale_price                  1   1724.0 5214.2
## - neighborhood_n_w_ames       1   1724.7 5214.9
## - neighborhood_stone_br       1   1724.7 5214.9
## - neighborhood_crawfor        1   1724.7 5215.0
## - month_sold                  1   1724.9 5215.1
## - sale_price_per_sqft         1   1725.1 5215.3
## - neighborhood_collg_cr       1   1725.3 5215.5
## - neighborhood_mitchel        1   1725.3 5215.5
## <none>                            1723.4 5215.6
## - neighborhood_timber         1   1725.8 5216.0
## - neighborhood_swisu          1   1726.2 5216.4
## - neighborhood_veenker        1   1726.5 5216.7
## - neighborhood_brk_side       1   1726.6 5216.8
## - neighborhood_meadowv        1   1726.9 5217.1
## - overall_qual                1   1727.1 5217.3
## - NeighborhoodBrDale          1   1728.0 5218.2
## - log_rel_price               1   1728.3 5218.5
## - neighborhood_n_ridge        1   1729.7 5219.9
## - tot_bathrooms               1   1731.7 5221.9
## - bath_pr_sqft                1   1734.3 5224.5
## - neighborhood_n_ridge_hghts  1   1736.1 5226.3
## - exter_qual                  1   1737.2 5227.5
## - garage_type_basment         1   1738.8 5229.0
## - garage_type_builtIn         1   1739.6 5229.8
## - garage_type_no_garage       1   1740.8 5231.0
## - neighborhood_sawyer         1   1742.5 5232.7
## - neighborhood_idottrr        1   1744.3 5234.5
## - total_sq_feet               1   1745.2 5235.5
## - garage_type_detchd          1   1769.9 5260.1
## - central_air                 1   1923.8 5414.0
## - student_apt                 1   1930.2 5420.4
## 
## Step:  AIC=5213.87
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer + 
##     neighborhood_sawyer_w + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_sawyer_w       1   1724.0 5212.2
## - neighborhood_edwards        1   1724.7 5212.9
## - sale_price                  1   1724.7 5212.9
## - neighborhood_n_w_ames       1   1724.9 5213.1
## - neighborhood_crawfor        1   1725.0 5213.2
## - month_sold                  1   1725.1 5213.4
## - neighborhood_stone_br       1   1725.2 5213.4
## - neighborhood_mitchel        1   1725.4 5213.6
## - neighborhood_collg_cr       1   1725.5 5213.7
## <none>                            1723.7 5213.9
## - sale_price_per_sqft         1   1725.7 5213.9
## - neighborhood_timber         1   1726.2 5214.4
## - neighborhood_brk_side       1   1726.6 5214.8
## - neighborhood_swisu          1   1726.8 5215.1
## - neighborhood_veenker        1   1726.9 5215.1
## - neighborhood_meadowv        1   1727.0 5215.2
## - overall_qual                1   1727.4 5215.6
## - NeighborhoodBrDale          1   1728.0 5216.2
## - log_rel_price               1   1728.5 5216.8
## - neighborhood_n_ridge        1   1730.8 5219.0
## - tot_bathrooms               1   1732.4 5220.6
## - bath_pr_sqft                1   1735.2 5223.4
## - exter_qual                  1   1737.4 5225.6
## - neighborhood_n_ridge_hghts  1   1737.6 5225.8
## - garage_type_basment         1   1739.1 5227.3
## - garage_type_builtIn         1   1739.8 5228.0
## - garage_type_no_garage       1   1742.4 5230.6
## - neighborhood_sawyer         1   1742.7 5230.9
## - neighborhood_idottrr        1   1744.8 5233.0
## - total_sq_feet               1   1745.4 5233.7
## - garage_type_detchd          1   1778.8 5267.0
## - student_apt                 1   1930.5 5418.7
## - central_air                 1   1936.7 5424.9
## 
## Step:  AIC=5212.18
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet + 
##     tot_bathrooms + exter_qual + central_air + garage_type_basment + 
##     garage_type_builtIn + garage_type_detchd + garage_type_no_garage + 
##     NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr + 
##     neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer + 
##     neighborhood_stone_br + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - sale_price                  1   1724.9 5211.1
## - neighborhood_n_w_ames       1   1725.0 5211.3
## - neighborhood_edwards        1   1725.0 5211.3
## - neighborhood_crawfor        1   1725.2 5211.4
## - neighborhood_stone_br       1   1725.3 5211.5
## - month_sold                  1   1725.5 5211.7
## - neighborhood_collg_cr       1   1725.5 5211.7
## - neighborhood_mitchel        1   1725.6 5211.8
## <none>                            1724.0 5212.2
## - sale_price_per_sqft         1   1726.0 5212.2
## - neighborhood_timber         1   1726.3 5212.6
## - neighborhood_brk_side       1   1726.9 5213.1
## - neighborhood_veenker        1   1727.0 5213.3
## - neighborhood_swisu          1   1727.2 5213.4
## - neighborhood_meadowv        1   1727.2 5213.4
## - overall_qual                1   1727.7 5213.9
## - NeighborhoodBrDale          1   1728.3 5214.5
## - log_rel_price               1   1728.8 5215.0
## - neighborhood_n_ridge        1   1730.8 5217.0
## - tot_bathrooms               1   1732.8 5219.0
## - bath_pr_sqft                1   1735.6 5221.8
## - neighborhood_n_ridge_hghts  1   1737.6 5223.8
## - exter_qual                  1   1737.6 5223.8
## - garage_type_basment         1   1739.4 5225.6
## - garage_type_builtIn         1   1740.0 5226.2
## - garage_type_no_garage       1   1742.5 5228.7
## - neighborhood_sawyer         1   1742.8 5229.0
## - neighborhood_idottrr        1   1745.0 5231.2
## - total_sq_feet               1   1745.7 5231.9
## - garage_type_detchd          1   1778.9 5265.1
## - student_apt                 1   1932.2 5418.4
## - central_air                 1   1936.7 5422.9
## 
## Step:  AIC=5211.14
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor + 
##     neighborhood_edwards + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts + 
##     neighborhood_n_w_ames + neighborhood_sawyer + neighborhood_stone_br + 
##     neighborhood_swisu + neighborhood_timber + neighborhood_veenker + 
##     log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_edwards        1   1725.6 5209.8
## - neighborhood_stone_br       1   1725.6 5209.9
## - neighborhood_crawfor        1   1725.7 5209.9
## - neighborhood_n_w_ames       1   1725.8 5210.0
## - neighborhood_collg_cr       1   1726.2 5210.5
## - sale_price_per_sqft         1   1726.5 5210.7
## - month_sold                  1   1726.5 5210.7
## - neighborhood_timber         1   1726.7 5210.9
## - neighborhood_mitchel        1   1726.7 5210.9
## <none>                            1724.9 5211.1
## - neighborhood_veenker        1   1727.5 5211.7
## - overall_qual                1   1727.8 5212.0
## - neighborhood_swisu          1   1727.9 5212.1
## - neighborhood_brk_side       1   1728.5 5212.7
## - neighborhood_meadowv        1   1729.0 5213.2
## - log_rel_price               1   1729.5 5213.7
## - NeighborhoodBrDale          1   1730.1 5214.3
## - neighborhood_n_ridge        1   1731.2 5215.4
## - tot_bathrooms               1   1733.0 5217.2
## - bath_pr_sqft                1   1735.8 5220.0
## - neighborhood_n_ridge_hghts  1   1738.9 5223.1
## - exter_qual                  1   1740.5 5224.7
## - garage_type_builtIn         1   1740.7 5224.9
## - garage_type_basment         1   1740.7 5224.9
## - garage_type_no_garage       1   1742.9 5227.1
## - neighborhood_sawyer         1   1745.4 5229.6
## - total_sq_feet               1   1747.2 5231.4
## - neighborhood_idottrr        1   1748.0 5232.2
## - garage_type_detchd          1   1780.5 5264.7
## - student_apt                 1   1932.8 5417.0
## - central_air                 1   1937.1 5421.3
## 
## Step:  AIC=5209.79
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_sawyer + neighborhood_stone_br + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_stone_br       1   1726.2 5208.4
## - neighborhood_crawfor        1   1726.4 5208.6
## - neighborhood_n_w_ames       1   1726.5 5208.7
## - neighborhood_collg_cr       1   1727.0 5209.2
## - sale_price_per_sqft         1   1727.1 5209.3
## - month_sold                  1   1727.2 5209.4
## - neighborhood_timber         1   1727.3 5209.5
## <none>                            1725.6 5209.8
## - neighborhood_mitchel        1   1727.7 5209.9
## - neighborhood_veenker        1   1728.1 5210.3
## - neighborhood_swisu          1   1728.3 5210.5
## - overall_qual                1   1728.9 5211.1
## - neighborhood_brk_side       1   1729.9 5212.1
## - log_rel_price               1   1729.9 5212.1
## - neighborhood_meadowv        1   1730.2 5212.4
## - NeighborhoodBrDale          1   1731.2 5213.4
## - neighborhood_n_ridge        1   1731.5 5213.7
## - tot_bathrooms               1   1733.7 5215.9
## - bath_pr_sqft                1   1736.5 5218.7
## - neighborhood_n_ridge_hghts  1   1739.0 5221.3
## - garage_type_basment         1   1740.8 5223.0
## - exter_qual                  1   1741.1 5223.3
## - garage_type_builtIn         1   1741.3 5223.6
## - garage_type_no_garage       1   1743.1 5225.3
## - neighborhood_sawyer         1   1747.7 5229.9
## - total_sq_feet               1   1747.7 5229.9
## - neighborhood_idottrr        1   1750.2 5232.4
## - garage_type_detchd          1   1780.6 5262.8
## - student_apt                 1   1933.2 5415.4
## - central_air                 1   1937.5 5419.7
## 
## Step:  AIC=5208.41
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor + 
##     neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames + 
##     neighborhood_sawyer + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_crawfor        1   1726.9 5207.1
## - neighborhood_n_w_ames       1   1727.0 5207.2
## - neighborhood_collg_cr       1   1727.3 5207.5
## - neighborhood_timber         1   1727.6 5207.8
## - month_sold                  1   1728.0 5208.2
## - sale_price_per_sqft         1   1728.1 5208.3
## <none>                            1726.2 5208.4
## - neighborhood_mitchel        1   1728.3 5208.5
## - neighborhood_veenker        1   1728.5 5208.7
## - neighborhood_swisu          1   1728.9 5209.1
## - log_rel_price               1   1730.0 5210.2
## - overall_qual                1   1730.3 5210.5
## - neighborhood_brk_side       1   1730.7 5210.9
## - neighborhood_meadowv        1   1731.0 5211.2
## - neighborhood_n_ridge        1   1731.5 5211.7
## - NeighborhoodBrDale          1   1731.9 5212.1
## - tot_bathrooms               1   1735.1 5215.3
## - bath_pr_sqft                1   1738.0 5218.2
## - neighborhood_n_ridge_hghts  1   1739.3 5219.5
## - garage_type_basment         1   1741.3 5221.5
## - exter_qual                  1   1741.4 5221.6
## - garage_type_builtIn         1   1742.3 5222.5
## - garage_type_no_garage       1   1743.6 5223.8
## - total_sq_feet               1   1747.8 5228.0
## - neighborhood_sawyer         1   1748.6 5228.8
## - neighborhood_idottrr        1   1751.2 5231.4
## - garage_type_detchd          1   1780.8 5261.0
## - student_apt                 1   1935.2 5415.4
## - central_air                 1   1937.5 5417.7
## 
## Step:  AIC=5207.14
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_collg_cr + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer + 
##     neighborhood_swisu + neighborhood_timber + neighborhood_veenker + 
##     log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_n_w_ames       1   1727.5 5205.8
## - neighborhood_collg_cr       1   1727.9 5206.1
## - neighborhood_timber         1   1728.2 5206.4
## - month_sold                  1   1728.8 5207.0
## - sale_price_per_sqft         1   1728.8 5207.0
## - neighborhood_mitchel        1   1728.9 5207.1
## <none>                            1726.9 5207.1
## - neighborhood_veenker        1   1729.2 5207.4
## - neighborhood_swisu          1   1729.8 5208.0
## - log_rel_price               1   1730.7 5208.9
## - neighborhood_brk_side       1   1731.2 5209.4
## - overall_qual                1   1731.4 5209.6
## - neighborhood_meadowv        1   1731.7 5209.9
## - neighborhood_n_ridge        1   1731.9 5210.1
## - NeighborhoodBrDale          1   1732.5 5210.8
## - tot_bathrooms               1   1735.6 5213.8
## - bath_pr_sqft                1   1738.5 5216.7
## - neighborhood_n_ridge_hghts  1   1739.5 5217.7
## - garage_type_basment         1   1742.4 5220.6
## - garage_type_builtIn         1   1742.9 5221.1
## - exter_qual                  1   1743.0 5221.2
## - garage_type_no_garage       1   1744.2 5222.5
## - total_sq_feet               1   1748.2 5226.4
## - neighborhood_sawyer         1   1749.0 5227.2
## - neighborhood_idottrr        1   1751.7 5229.9
## - garage_type_detchd          1   1782.6 5260.8
## - student_apt                 1   1935.2 5413.5
## - central_air                 1   1938.9 5417.1
## 
## Step:  AIC=5205.75
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_collg_cr + neighborhood_idottrr + 
##     neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge + 
##     neighborhood_n_ridge_hghts + neighborhood_sawyer + neighborhood_swisu + 
##     neighborhood_timber + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_collg_cr       1   1728.3 5204.5
## - neighborhood_timber         1   1728.6 5204.8
## - neighborhood_mitchel        1   1729.3 5205.5
## - month_sold                  1   1729.3 5205.5
## - sale_price_per_sqft         1   1729.4 5205.6
## <none>                            1727.5 5205.8
## - neighborhood_veenker        1   1729.6 5205.8
## - neighborhood_swisu          1   1730.5 5206.7
## - log_rel_price               1   1731.2 5207.4
## - neighborhood_brk_side       1   1731.9 5208.1
## - neighborhood_n_ridge        1   1732.1 5208.3
## - neighborhood_meadowv        1   1732.3 5208.5
## - overall_qual                1   1732.3 5208.5
## - NeighborhoodBrDale          1   1733.2 5209.4
## - tot_bathrooms               1   1736.4 5212.6
## - bath_pr_sqft                1   1739.2 5215.4
## - neighborhood_n_ridge_hghts  1   1739.6 5215.8
## - garage_type_basment         1   1742.7 5218.9
## - garage_type_builtIn         1   1743.4 5219.6
## - garage_type_no_garage       1   1744.5 5220.7
## - exter_qual                  1   1745.8 5222.0
## - total_sq_feet               1   1748.6 5224.8
## - neighborhood_sawyer         1   1749.2 5225.4
## - neighborhood_idottrr        1   1752.3 5228.5
## - garage_type_detchd          1   1782.7 5258.9
## - student_apt                 1   1936.7 5412.9
## - central_air                 1   1938.9 5415.1
## 
## Step:  AIC=5204.5
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts + 
##     neighborhood_sawyer + neighborhood_swisu + neighborhood_timber + 
##     neighborhood_veenker + log_rel_price + sale_price_per_sqft + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_timber         1   1729.2 5203.4
## - neighborhood_mitchel        1   1729.9 5204.1
## - month_sold                  1   1730.0 5204.2
## - sale_price_per_sqft         1   1730.1 5204.3
## - neighborhood_veenker        1   1730.2 5204.4
## <none>                            1728.3 5204.5
## - neighborhood_swisu          1   1731.3 5205.5
## - log_rel_price               1   1732.0 5206.3
## - neighborhood_n_ridge        1   1732.3 5206.5
## - neighborhood_brk_side       1   1732.6 5206.8
## - neighborhood_meadowv        1   1732.9 5207.1
## - overall_qual                1   1733.3 5207.5
## - NeighborhoodBrDale          1   1733.8 5208.1
## - tot_bathrooms               1   1737.1 5211.3
## - neighborhood_n_ridge_hghts  1   1739.6 5213.8
## - bath_pr_sqft                1   1739.8 5214.1
## - garage_type_basment         1   1743.3 5217.5
## - garage_type_builtIn         1   1744.0 5218.2
## - garage_type_no_garage       1   1745.1 5219.3
## - exter_qual                  1   1745.8 5220.0
## - neighborhood_sawyer         1   1749.6 5223.8
## - total_sq_feet               1   1749.7 5223.9
## - neighborhood_idottrr        1   1753.1 5227.3
## - garage_type_detchd          1   1782.7 5256.9
## - student_apt                 1   1937.0 5411.2
## - central_air                 1   1939.1 5413.3
## 
## Step:  AIC=5203.37
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts + 
##     neighborhood_sawyer + neighborhood_swisu + neighborhood_veenker + 
##     log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_mitchel        1   1730.7 5202.9
## - month_sold                  1   1730.7 5202.9
## - neighborhood_veenker        1   1731.0 5203.2
## - sale_price_per_sqft         1   1731.0 5203.2
## <none>                            1729.2 5203.4
## - neighborhood_swisu          1   1732.3 5204.5
## - log_rel_price               1   1732.7 5204.9
## - neighborhood_n_ridge        1   1732.7 5204.9
## - neighborhood_brk_side       1   1733.5 5205.7
## - neighborhood_meadowv        1   1733.9 5206.1
## - overall_qual                1   1734.6 5206.8
## - NeighborhoodBrDale          1   1734.8 5207.0
## - tot_bathrooms               1   1738.1 5210.3
## - neighborhood_n_ridge_hghts  1   1739.7 5211.9
## - bath_pr_sqft                1   1740.9 5213.1
## - garage_type_basment         1   1744.9 5217.1
## - garage_type_builtIn         1   1745.0 5217.2
## - garage_type_no_garage       1   1745.9 5218.1
## - exter_qual                  1   1746.5 5218.7
## - total_sq_feet               1   1750.2 5222.4
## - neighborhood_sawyer         1   1750.5 5222.8
## - neighborhood_idottrr        1   1754.2 5226.4
## - garage_type_detchd          1   1783.2 5255.4
## - student_apt                 1   1937.8 5410.0
## - central_air                 1   1939.6 5411.9
## 
## Step:  AIC=5202.89
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms + 
##     exter_qual + central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer + 
##     neighborhood_swisu + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - month_sold                  1   1732.2 5202.4
## - sale_price_per_sqft         1   1732.4 5202.6
## - neighborhood_veenker        1   1732.4 5202.7
## <none>                            1730.7 5202.9
## - neighborhood_swisu          1   1733.9 5204.1
## - neighborhood_n_ridge        1   1734.3 5204.5
## - log_rel_price               1   1734.4 5204.7
## - neighborhood_brk_side       1   1734.8 5205.0
## - neighborhood_meadowv        1   1735.1 5205.3
## - overall_qual                1   1735.8 5206.0
## - NeighborhoodBrDale          1   1736.2 5206.4
## - tot_bathrooms               1   1739.5 5209.7
## - neighborhood_n_ridge_hghts  1   1741.4 5211.6
## - bath_pr_sqft                1   1742.1 5212.4
## - garage_type_basment         1   1746.1 5216.3
## - garage_type_builtIn         1   1746.4 5216.6
## - garage_type_no_garage       1   1748.1 5218.3
## - exter_qual                  1   1748.8 5219.0
## - neighborhood_sawyer         1   1751.3 5221.5
## - total_sq_feet               1   1752.0 5222.2
## - neighborhood_idottrr        1   1755.4 5225.6
## - garage_type_detchd          1   1783.6 5253.8
## - central_air                 1   1940.0 5410.2
## - student_apt                 1   1940.8 5411.0
## 
## Step:  AIC=5202.41
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual + 
##     central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer + 
##     neighborhood_swisu + neighborhood_veenker + log_rel_price + 
##     sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - sale_price_per_sqft         1   1733.9 5202.1
## - neighborhood_veenker        1   1734.0 5202.2
## <none>                            1732.2 5202.4
## - neighborhood_swisu          1   1735.5 5203.7
## - neighborhood_n_ridge        1   1735.8 5204.0
## - log_rel_price               1   1735.9 5204.1
## - neighborhood_brk_side       1   1736.2 5204.4
## - neighborhood_meadowv        1   1736.6 5204.8
## - overall_qual                1   1737.5 5205.7
## - NeighborhoodBrDale          1   1737.6 5205.8
## - tot_bathrooms               1   1741.2 5209.4
## - neighborhood_n_ridge_hghts  1   1742.9 5211.1
## - bath_pr_sqft                1   1744.0 5212.2
## - garage_type_basment         1   1747.6 5215.8
## - garage_type_builtIn         1   1748.2 5216.4
## - garage_type_no_garage       1   1749.7 5217.9
## - exter_qual                  1   1749.9 5218.1
## - neighborhood_sawyer         1   1752.7 5220.9
## - total_sq_feet               1   1753.6 5221.8
## - neighborhood_idottrr        1   1756.6 5224.8
## - garage_type_detchd          1   1786.1 5254.3
## - central_air                 1   1942.9 5411.1
## - student_apt                 1   1943.5 5411.8
## 
## Step:  AIC=5202.14
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual + 
##     central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer + 
##     neighborhood_swisu + neighborhood_veenker + log_rel_price + 
##     bath_pr_sqft + student_apt + offset(log(num_units))
## 
##                              Df Deviance    AIC
## - neighborhood_veenker        1   1735.9 5202.1
## <none>                            1733.9 5202.1
## - neighborhood_swisu          1   1737.5 5203.7
## - neighborhood_meadowv        1   1738.2 5204.5
## - neighborhood_brk_side       1   1738.5 5204.8
## - neighborhood_n_ridge        1   1739.2 5205.4
## - overall_qual                1   1739.2 5205.4
## - NeighborhoodBrDale          1   1739.4 5205.6
## - tot_bathrooms               1   1741.2 5207.4
## - bath_pr_sqft                1   1744.0 5210.3
## - neighborhood_n_ridge_hghts  1   1746.1 5212.3
## - garage_type_basment         1   1750.5 5216.7
## - garage_type_builtIn         1   1750.8 5217.0
## - exter_qual                  1   1751.7 5217.9
## - garage_type_no_garage       1   1753.4 5219.6
## - neighborhood_sawyer         1   1754.4 5220.7
## - neighborhood_idottrr        1   1759.3 5225.5
## - total_sq_feet               1   1771.5 5237.7
## - garage_type_detchd          1   1789.6 5255.8
## - log_rel_price               1   1867.2 5333.5
## - student_apt                 1   1944.6 5410.8
## - central_air                 1   1958.5 5424.8
## 
## Step:  AIC=5202.13
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual + 
##     central_air + garage_type_basment + garage_type_builtIn + 
##     garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale + 
##     neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv + 
##     neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer + 
##     neighborhood_swisu + log_rel_price + bath_pr_sqft + student_apt + 
##     offset(log(num_units))
## 
##                              Df Deviance    AIC
## <none>                            1735.9 5202.1
## - neighborhood_swisu          1   1739.5 5203.7
## - neighborhood_meadowv        1   1740.2 5204.5
## - neighborhood_brk_side       1   1740.6 5204.8
## - neighborhood_n_ridge        1   1740.8 5205.0
## - overall_qual                1   1741.3 5205.5
## - NeighborhoodBrDale          1   1741.4 5205.6
## - tot_bathrooms               1   1743.3 5207.5
## - bath_pr_sqft                1   1746.2 5210.4
## - neighborhood_n_ridge_hghts  1   1747.5 5211.7
## - garage_type_basment         1   1752.3 5216.6
## - garage_type_builtIn         1   1752.4 5216.6
## - exter_qual                  1   1753.5 5217.7
## - garage_type_no_garage       1   1755.2 5219.4
## - neighborhood_sawyer         1   1756.4 5220.6
## - neighborhood_idottrr        1   1761.4 5225.6
## - total_sq_feet               1   1773.3 5237.5
## - garage_type_detchd          1   1790.9 5255.1
## - log_rel_price               1   1868.0 5332.2
## - student_apt                 1   1946.3 5410.5
## - central_air                 1   1960.3 5424.5
library(dplyr)

final_glm <- glm(
  stepwise_result$formula, 
  data = train,
  family = glm$family
)

AIC(final_glm)
## [1] 5202.127
summary(final_glm)
## 
## Call:
## glm(formula = stepwise_result$formula, family = glm$family, data = train)
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.183e+00  3.149e-01  -6.930 4.20e-12 ***
## overall_qual               -4.870e-02  2.096e-02  -2.324 0.020140 *  
## total_sq_feet               6.071e-04  9.058e-05   6.702 2.05e-11 ***
## tot_bathrooms              -2.298e-01  7.537e-02  -3.049 0.002295 ** 
## exter_qual                  1.716e-01  4.078e-02   4.207 2.58e-05 ***
## central_airno              -1.798e+00  1.587e-01 -11.332  < 2e-16 ***
## garage_type_basment        -6.425e-01  1.755e-01  -3.661 0.000251 ***
## garage_type_builtIn        -2.754e-01  7.026e-02  -3.920 8.87e-05 ***
## garage_type_detchd         -3.378e-01  4.655e-02  -7.256 4.00e-13 ***
## garage_type_no_garage      -3.825e-01  9.100e-02  -4.204 2.62e-05 ***
## NeighborhoodBrDale         -5.172e-01  2.396e-01  -2.158 0.030906 *  
## neighborhood_brk_side      -2.225e-01  1.056e-01  -2.107 0.035078 *  
## neighborhood_idottrr       -8.875e-01  2.015e-01  -4.404 1.06e-05 ***
## neighborhood_meadowv       -3.599e-01  1.824e-01  -1.973 0.048472 *  
## neighborhood_n_ridge       -1.858e-01  8.568e-02  -2.169 0.030100 *  
## neighborhood_n_ridge_hghts -2.378e-01  7.099e-02  -3.350 0.000808 ***
## neighborhood_sawyer        -4.018e-01  9.394e-02  -4.277 1.89e-05 ***
## neighborhood_swisu          2.794e-01  1.426e-01   1.960 0.050047 .  
## log_rel_price              -1.001e+00  8.661e-02 -11.553  < 2e-16 ***
## bath_pr_sqft                9.349e+02  2.516e+02   3.715 0.000203 ***
## student_apt                 4.941e-01  3.311e-02  14.922  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 2784.1  on 1144  degrees of freedom
## Residual deviance: 1735.9  on 1124  degrees of freedom
## AIC: 5202.1
## 
## Number of Fisher Scoring iterations: 5
plot(final_glm)

Task 8 - Fit a LASSO

#THE ONLY CHANGES NEEDED
#1. Replace "WEIGHT_VARIABLE" with the weight variable that you used for the GLM
#2. In the  "lasso_formula" below, use your formula from task 6
#3. Use the same FAMILY from task 6.  Type ?glmnet into the console to read the possible options

lasso_formula <- applicants ~ . + offset(log(num_units)) - neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price 

lasso_x_train <- model.matrix(lasso_formula, data = train)
lasso_x_test <- model.matrix(lasso_formula, data = test)

control <-trainControl(method="cv", number=15)
lassoGrid <- expand.grid(alpha = 1, lambda = seq(0.0001,0.01,by = 0.0005))

#Fit a lasso and inspect the variables which have zero coefficients.
#Note that the glmnet library only allows the identity link function
lasso <- train(x = lasso_x_train, 
               y = train_y, 
               method = 'glmnet', 
               family = "poisson",

               trControl = control, 
               tuneGrid = lassoGrid) 

#This code gets the coefficients from the LASSO which are zero
lasso_results <- varImp(lasso,scale=F)
lasso_coefficients <- lasso_results$importance

#Show the coefficients which are zero
variables_with_zeros <- colnames(lasso_x_train)[which(lasso_coefficients$Overall==0)]
print("Variables with Coefficients of Zero:")
## [1] "Variables with Coefficients of Zero:"
cat( paste( variables_with_zeros, collapse='\n' ) )
## sale_price
## year_sold
## month_sold
## tot_bathrooms
## lot_area
## neighborhood_gilbert
## neighborhood_n_w_ames
## log_rel_price

Task 9 - Create a bagged tree model

The following code creates eight samples of the data. 20% of records are taken, with replacement, from the data randomly.

sample1 <- train %>% sample_frac(0.2,replace=T)
sample2 <- train %>% sample_frac(0.2,replace=T)
sample3 <- train %>% sample_frac(0.2,replace=T)
sample4 <- train %>% sample_frac(0.2,replace=T)
sample5 <- train %>% sample_frac(0.2,replace=T)
sample6 <- train %>% sample_frac(0.2,replace=T)
sample7 <- train %>% sample_frac(0.2,replace=T)
sample8 <- train %>% sample_frac(0.2,replace=T)

This is the setting for the decision tree paramters. Make your adjustments here to test out different paramters.

#ctrl <- rpart.control(minbucket = 5, cp = 0.01, maxdepth = 5)
ctrl <- rpart.control(minbucket = 10, cp = 0.001, maxdepth = 5)
#ctrl <- rpart.control(minbucket = 10, cp = 0.01, maxdepth = 7)

The following code sets up a decision tree using all the variables in the dataframe. Each tree uses only 20% of the records. The number of units are used as the weights.

tree1 <- rpart(data = sample1, lasso_formula, weights = sample1$num_units, control = ctrl)
tree2 <- rpart(data = sample2, lasso_formula, weights = sample2$num_units, control = ctrl)
tree3 <- rpart(data = sample3, lasso_formula, weights = sample3$num_units, control = ctrl)
tree4 <- rpart(data = sample4, lasso_formula, weights = sample4$num_units, control = ctrl)
tree5 <- rpart(data = sample5, lasso_formula, weights = sample5$num_units, control = ctrl)
tree6 <- rpart(data = sample6, lasso_formula, weights = sample6$num_units, control = ctrl)
tree7 <- rpart(data = sample7, lasso_formula, weights = sample7$num_units, control = ctrl)
tree8 <- rpart(data = sample8, lasso_formula, weights = sample8$num_units, control = ctrl)

rpart.plot(tree1, cex = .7)

rpart.plot(tree2, cex = .7)

rpart.plot(tree3, cex = .7)

rpart.plot(tree4, cex = .7)

rpart.plot(tree5, cex = .7)

rpart.plot(tree6, cex = .7)

rpart.plot(tree7, cex = .7)

rpart.plot(tree8, cex = .7)

#make predictions
tree1_pred <- predict(tree1, newdata = test, type = "vector")
tree2_pred <- predict(tree2, newdata = test, type = "vector")
tree3_pred <- predict(tree3, newdata = test, type = "vector")
tree4_pred <- predict(tree4, newdata = test, type = "vector")
tree5_pred <- predict(tree5, newdata = test, type = "vector")
tree6_pred <- predict(tree6, newdata = test, type = "vector")
tree7_pred <- predict(tree7, newdata = test, type = "vector")
tree8_pred <- predict(tree8, newdata = test, type = "vector")

#UPDATE THIS FORMULA TO PERFORM BAGGING
trees_pred <- (tree1_pred + tree2_pred + tree3_pred+ tree4_pred+ tree5_pred+ tree6_pred+ tree7_pred+ tree8_pred)/8

LLfunction <- function(targets, predicted_values){
  p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
  p_v_pos <- ifelse(predicted_values <= 0, 0.000001 ,predicted_values)
  return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
# "targets" is a vector containing the actual values for the target variable
# "predicted_values" is a vector containing the predicted values for the target variable

LLfunction(test$applicants, trees_pred)
## [1] 343.9751
LLfunction(test$applicants, tree1_pred)
## [1] 264.3733

Task 10 - Measure the variable importance with a Random Forest

#NO CHANGES NEEDED
RF <- randomForest( applicants ~ . + neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price,
                   data = train,
                   weights = train_number_of_units, 
                   ntree=400,
                   importance=TRUE)

imp_RF <- importance(RF)
imp_DF <- data.frame(Variables = row.names(imp_RF), MSE = imp_RF[,1])
imp_DF <- imp_DF[order(imp_DF$MSE, decreasing = TRUE),]

ggplot(imp_DF[1:30,], aes(x=reorder(Variables, MSE), y=MSE, fill=MSE)) + geom_bar(stat = 'identity') + labs(x = 'Variables', y= '% increase MSE if variable is randomly permuted') + coord_flip() + theme(legend.position="none")

Task 11 - Compare model performance

#This function will calculate the log-liklihood based on a Poisson model for the number of applicants
LLfunction <- function(targets, predicted_values){
  p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
  p_v_pos <- ifelse(predicted_values <= 0, 0.000001, predicted_values)
  return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
print("loglikelihood")
## [1] "loglikelihood"
glm_pred <- predict(final_glm, type="response", newdata = test)
lasso_pred <- exp(predict(lasso,newdata = lasso_x_test, weights = test_number_of_units))
RF_pred <- predict(RF, type="response", newdata = test)

tibble(Model = c("GLM", "LASSO", "Bagged Trees", "Random Forest"),
       LogLikelihood = c(LLfunction(test$applicants,glm_pred),
                         LLfunction(test$applicants,lasso_pred),
                         LLfunction(test$applicants, trees_pred),
                         LLfunction(test$applicants,RF_pred)))
## # A tibble: 4 × 2
##   Model         LogLikelihood
##   <chr>                 <dbl>
## 1 GLM                    237.
## 2 LASSO               -54465.
## 3 Bagged Trees           344.
## 4 Random Forest          444.

Task 12 - Executive Summary