#NO CHANGES NEEDED
set.seed(1)
library(knitr)
library(broom)
library(ggplot2)
library(plyr)
library(corrplot)
library(caret)
library(gridExtra)
library(scales)
library(Rmisc)
library(randomForest)
library(psych)
library(xgboost)
library(rpart)
library(rpart.plot)
library(randomForest)
library(tidyverse)
theme_set(theme_minimal())
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# 1. Disable the broken uv temp stuff
Sys.setenv(RETICULATE_UV_ENABLED = "0")
# 2. Tell reticulate the full path to your real Python 3.13.2
reticulate::use_python("C:/Users/casti/AppData/Local/Programs/Python/Python313/python.exe", required = TRUE)
# 3. Restart R session inside RStudio
#.rs.restartR()
library(reticulate)
datasets <- import("datasets")
# Load the dataset
ds <- datasets$load_dataset("supersam7/apartment_apps")
df <- ds["train"]$to_pandas() # or ds$train$to_pandas() also works
df <- df[-1]
head(df)
## applicants sale_price num_units year_sold month_sold overall_qual
## 1 3 208500 4 2008 2 7
## 2 5 181500 5 2007 5 6
## 3 3 223500 1 2008 9 7
## 4 4 140000 3 2011 2 7
## 5 5 250000 5 2008 12 8
## 6 5 143000 3 2009 10 5
## total_sq_feet gr_liv_area tot_bathrooms lot_area exter_qual full_bath
## 1 3586.886 2785.060 5 3898.256 4 4
## 2 3560.348 2315.267 3 4149.410 3 4
## 3 3672.314 2852.313 5 4461.603 4 4
## 4 3527.523 2791.378 3 4139.131 3 2
## 5 4012.276 3173.357 5 4928.292 4 4
## 6 3308.431 2433.181 3 4908.174 3 2
## central_air garage_type_attchd garage_type_basment garage_type_builtIn
## 1 yes 1 0 0
## 2 yes 1 0 0
## 3 yes 1 0 0
## 4 yes 0 0 0
## 5 yes 1 0 0
## 6 yes 1 0 0
## garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## 1 0 0 1
## 2 0 0 0
## 3 0 0 1
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 1
## neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 1 0
## 6 0 0 0
## neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_swisu neighborhood_timber neighborhood_veenker
## 1 0 0 0
## 2 0 0 1
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_saleprice
## 1 198517.7
## 2 245890.6
## 3 198517.7
## 4 213681.7
## 5 331835.3
## 6 157755.8
Your assistant has provided you with these code templates.
#This function will calculate the log-liklihood based on a Poisson model for the number of applicants
LLfunction <- function(targets, predicted_values){
p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
p_v_pos <- ifelse(predicted_values <= 0, 0.000001 ,predicted_values)
return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
print("loglikelihood")
LLfunction(test$applicants,predictions)
This code creates a scatter plot, a box plot, and a histogram.
#create a boxplot
ggplot(df, aes(as.factor(applicants),VARIABLE)) +
geom_boxplot()
#create a scatterplot
ggplot(df,aes(applicants,VARIABLE)) +
geom_point()
#create a histogram
ggplot(df,aes(VARIABLE)) +
geom_histogram()
#create a bar plot
ggplot(df, aes(VARIABLE)) +
geom_bar(stat = "count")
Shows the average number of applicants across factor levels.
#Average number of health applicants per unit
df %>%
group_by(VARIABLE) %>%
summarise(
average_num_applicants = sum(applicants*num_units)/sum(num_units)
)
Converts variables to numeric or factor while also setting the base reference level to the value which has the most observations.
##Convert to factor and set factor levels
df <- df %>% mutate(VARIABLE = fct_infreq(as.character(VARIABLE))
#Using Base R
df$VARIABLE = fct_infreq(as.character(df$VARIABLE))
#Convert to numeric
df <- df %>% mutate(VARIABLE = fct_infreq(as.characterVARIABLE))
#Using Base R
df$VARIABLE = as.numeric(df$VARIABLE)
glimpse(df)
## Rows: 1,430
## Columns: 41
## $ applicants <dbl> 3, 5, 3, 4, 5, 5, 5, 5, 2, 3, 1, 6, 1, 5, 3…
## $ sale_price <dbl> 208500, 181500, 223500, 140000, 250000, 143…
## $ num_units <dbl> 4, 5, 1, 3, 5, 3, 1, 3, 5, 4, 3, 1, 5, 2, 5…
## $ year_sold <dbl> 2008, 2007, 2008, 2011, 2008, 2009, 2007, 2…
## $ month_sold <dbl> 2, 5, 9, 2, 12, 10, 8, 11, 4, 1, 2, 7, 9, 8…
## $ overall_qual <dbl> 7, 6, 7, 7, 8, 5, 8, 7, 7, 5, 5, 9, 5, 7, 6…
## $ total_sq_feet <dbl> 3586.886, 3560.348, 3672.314, 3527.523, 401…
## $ gr_liv_area <dbl> 2785.060, 2315.267, 2852.313, 2791.378, 317…
## $ tot_bathrooms <dbl> 5, 3, 5, 3, 5, 3, 4, 5, 3, 3, 3, 5, 3, 3, 3…
## $ lot_area <dbl> 3898.256, 4149.410, 4461.603, 4139.131, 492…
## $ exter_qual <dbl> 4, 3, 4, 3, 4, 3, 4, 3, 3, 3, 3, 5, 3, 4, 3…
## $ full_bath <dbl> 4, 4, 4, 2, 4, 2, 4, 4, 4, 2, 2, 6, 2, 4, 2…
## $ central_air <chr> "yes", "yes", "yes", "yes", "yes", "yes", "…
## $ garage_type_attchd <dbl> 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1…
## $ garage_type_basment <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ garage_type_builtIn <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ garage_type_detchd <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0…
## $ garage_type_no_garage <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ NeighborhoodBrDale <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_brk_side <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0…
## $ neighborhood_clear_cr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_collg_cr <dbl> 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0…
## $ neighborhood_crawfor <dbl> 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_edwards <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_gilbert <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_idottrr <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_meadowv <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_mitchel <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_n_ames <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1…
## $ neighborhood_n_ridge <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_n_ridge_hghts <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ neighborhood_n_w_ames <dbl> 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_old_town <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0…
## $ neighborhood_sawyer <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0…
## $ neighborhood_sawyer_w <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_somerst <dbl> 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_stone_br <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_swisu <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_timber <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_veenker <dbl> 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ neighborhood_saleprice <dbl> 198517.7, 245890.6, 198517.7, 213681.7, 331…
summary(df)
## applicants sale_price num_units year_sold
## Min. :0.000 Min. : 34900 Min. :1.000 Min. :2007
## 1st Qu.:2.000 1st Qu.:129600 1st Qu.:1.000 1st Qu.:2008
## Median :4.000 Median :163000 Median :2.000 Median :2009
## Mean :3.633 Mean :181069 Mean :2.685 Mean :2009
## 3rd Qu.:5.000 3rd Qu.:214375 3rd Qu.:4.000 3rd Qu.:2010
## Max. :7.000 Max. :755000 Max. :5.000 Max. :2011
## month_sold overall_qual total_sq_feet gr_liv_area
## Min. : 1.000 Min. : 1.000 Min. : 311 Min. : 261.6
## 1st Qu.: 5.000 1st Qu.: 5.000 1st Qu.:3193 1st Qu.:2139.6
## Median : 6.000 Median : 6.000 Median :3528 Median :2547.0
## Mean : 6.329 Mean : 6.082 Mean :3510 Mean :2511.2
## 3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.:3852 3rd Qu.:2852.3
## Max. :12.000 Max. :10.000 Max. :5171 Max. :4273.5
## tot_bathrooms lot_area exter_qual full_bath
## Min. :1.000 Min. : 465.9 Min. :2.00 Min. :0.000
## 1st Qu.:3.000 1st Qu.: 3725.6 1st Qu.:3.00 1st Qu.:2.000
## Median :3.000 Median : 4138.8 Median :3.00 Median :4.000
## Mean :2.952 Mean : 4070.4 Mean :3.39 Mean :3.115
## 3rd Qu.:3.000 3rd Qu.: 4529.0 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :8.000 Max. :10271.5 Max. :5.00 Max. :6.000
## central_air garage_type_attchd garage_type_basment garage_type_builtIn
## Length:1430 Min. :0.0000 Min. :0.00000 Min. :0.00000
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Mode :character Median :1.0000 Median :0.00000 Median :0.00000
## Mean :0.5888 Mean :0.01329 Mean :0.06084
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.2699 Mean :0.05664 Mean :0.01119
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.04056 Mean :0.01958 Mean :0.1049
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000
## neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03566 Mean :0.06853 Mean :0.05524
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02587 Mean :0.01189 Mean :0.03427
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1573 Mean :0.02867 Mean :0.05385
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05105 Mean :0.07902 Mean :0.05175
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04126 Mean :0.06014 Mean :0.01748
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_swisu neighborhood_timber neighborhood_veenker
## Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.01748 Mean :0.02657 Mean :0.007692
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.00000 Max. :1.000000
## neighborhood_saleprice
## Min. :101389
## 1st Qu.:136902
## Median :181841
## Mean :180909
## 3rd Qu.:198518
## Max. :331835
sum(df$num_units)
## [1] 3839
sum(df$num_units*df$applicants)/sum(df$num_units)
## [1] 3.588695
df %>% summarise(avg_applicants = sum(applicants*num_units)/sum(num_units))
## avg_applicants
## 1 3.588695
#NO CODE CHANGES NEEDED
df %>%
group_by(applicants) %>%
summarise(total_units = sum(num_units))
## # A tibble: 8 × 2
## applicants total_units
## <dbl> <dbl>
## 1 0 152
## 2 1 360
## 3 2 532
## 4 3 898
## 5 4 487
## 6 5 1028
## 7 6 103
## 8 7 279
#Graph A: Histogram
ggplot(data=df, aes(applicants)) +
geom_histogram()
#Graph B: Bar plot
df %>%
mutate(applicants = as.factor(applicants)) %>%
group_by(applicants) %>%
summarise(
total_units = sum(num_units)
) %>%
ungroup() %>%
ggplot(aes(x=applicants, y=total_units)) +
geom_bar(stat="identity")
# Task 2 - Explore the predictor variables – FIXED VERSION
summary(df)
## applicants sale_price num_units year_sold
## Min. :0.000 Min. : 34900 Min. :1.000 Min. :2007
## 1st Qu.:2.000 1st Qu.:129600 1st Qu.:1.000 1st Qu.:2008
## Median :4.000 Median :163000 Median :2.000 Median :2009
## Mean :3.633 Mean :181069 Mean :2.685 Mean :2009
## 3rd Qu.:5.000 3rd Qu.:214375 3rd Qu.:4.000 3rd Qu.:2010
## Max. :7.000 Max. :755000 Max. :5.000 Max. :2011
## month_sold overall_qual total_sq_feet gr_liv_area
## Min. : 1.000 Min. : 1.000 Min. : 311 Min. : 261.6
## 1st Qu.: 5.000 1st Qu.: 5.000 1st Qu.:3193 1st Qu.:2139.6
## Median : 6.000 Median : 6.000 Median :3528 Median :2547.0
## Mean : 6.329 Mean : 6.082 Mean :3510 Mean :2511.2
## 3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.:3852 3rd Qu.:2852.3
## Max. :12.000 Max. :10.000 Max. :5171 Max. :4273.5
## tot_bathrooms lot_area exter_qual full_bath
## Min. :1.000 Min. : 465.9 Min. :2.00 Min. :0.000
## 1st Qu.:3.000 1st Qu.: 3725.6 1st Qu.:3.00 1st Qu.:2.000
## Median :3.000 Median : 4138.8 Median :3.00 Median :4.000
## Mean :2.952 Mean : 4070.4 Mean :3.39 Mean :3.115
## 3rd Qu.:3.000 3rd Qu.: 4529.0 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :8.000 Max. :10271.5 Max. :5.00 Max. :6.000
## central_air garage_type_attchd garage_type_basment garage_type_builtIn
## Length:1430 Min. :0.0000 Min. :0.00000 Min. :0.00000
## Class :character 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Mode :character Median :1.0000 Median :0.00000 Median :0.00000
## Mean :0.5888 Mean :0.01329 Mean :0.06084
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.2699 Mean :0.05664 Mean :0.01119
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.04056 Mean :0.01958 Mean :0.1049
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000
## neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.03566 Mean :0.06853 Mean :0.05524
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02587 Mean :0.01189 Mean :0.03427
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.1573 Mean :0.02867 Mean :0.05385
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000
## neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05105 Mean :0.07902 Mean :0.05175
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.04126 Mean :0.06014 Mean :0.01748
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_swisu neighborhood_timber neighborhood_veenker
## Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.01748 Mean :0.02657 Mean :0.007692
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.00000 Max. :1.000000
## neighborhood_saleprice
## Min. :101389
## 1st Qu.:136902
## Median :181841
## Mean :180909
## 3rd Qu.:198518
## Max. :331835
#NO CHANGES NEEDED – weighted averages
df %>%
summarise(
mean_number_of_units = sum(num_units),
mean_sale_price = dollar(sum(num_units*sale_price)/sum(num_units)),
mean_overall_qual = sum(num_units*overall_qual)/sum(num_units)
)
## mean_number_of_units mean_sale_price mean_overall_qual
## 1 3839 $180,146 6.060172
# ... your plots ...
# FIXED: Keep the original columns, just ADD the logged versions
df <- df %>%
mutate(
log_rel_price = log(sale_price / neighborhood_saleprice),
log_sale_price = log(sale_price)
# DO NOT drop sale_price or neighborhood_saleprice here!
)
#overall qual
ggplot(df,aes(overall_qual)) +
geom_bar(stat = "count")
df %>%
summarise(
avg = sum(num_units*overall_qual)/sum(num_units)
)
## avg
## 1 6.060172
df %>%
group_by(overall_qual) %>%
summarise(
total_units = sum(num_units)
) %>%
ggplot(aes(x=overall_qual, y=total_units)) +
geom_bar(stat="identity")
#central air
df %>%
group_by(central_air) %>%
summarise(
total_units = sum(num_units)
) %>%
ggplot(aes(x=central_air, y=total_units)) +
geom_bar(stat="identity")
df %>%
group_by(central_air) %>%
summarise(
total_units = sum(num_units)
)
## # A tibble: 2 × 2
## central_air total_units
## <chr> <dbl>
## 1 no 269
## 2 yes 3570
df %>% count(overall_qual)
## overall_qual n
## 1 1 2
## 2 2 3
## 3 3 20
## 4 4 116
## 5 5 397
## 6 6 363
## 7 7 305
## 8 8 165
## 9 9 43
## 10 10 16
#check that bae level (the one that appears first) is the one with the most observations
summary(df$central_air)
## Length Class Mode
## 1430 character character
df %>% group_by(central_air) %>%
summarise(applicants=sum(applicants),
total_units = sum(num_units))
## # A tibble: 2 × 3
## central_air applicants total_units
## <chr> <dbl> <dbl>
## 1 no 54 269
## 2 yes 5141 3570
ggplot(df,aes(as.factor(central_air),applicants)) +
geom_boxplot()
df$central_air <- fct_infreq(df$central_air)
summary(df$central_air)
## yes no
## 1335 95
Code is provided to calculate the average and create box plots of sale_price by year and month.
df <- df %>%
mutate(
sale_price_per_sqft = exp(log_rel_price) / total_sq_feet,
bath_pr_sqft = tot_bathrooms/total_sq_feet,
student_apt = ifelse(month_sold %in% c(7,8),1,0)
)
df %>% count(student_apt)
## student_apt n
## 1 0 1079
## 2 1 351
df %>% summary()
## applicants sale_price num_units year_sold
## Min. :0.000 Min. : 34900 Min. :1.000 Min. :2007
## 1st Qu.:2.000 1st Qu.:129600 1st Qu.:1.000 1st Qu.:2008
## Median :4.000 Median :163000 Median :2.000 Median :2009
## Mean :3.633 Mean :181069 Mean :2.685 Mean :2009
## 3rd Qu.:5.000 3rd Qu.:214375 3rd Qu.:4.000 3rd Qu.:2010
## Max. :7.000 Max. :755000 Max. :5.000 Max. :2011
## month_sold overall_qual total_sq_feet gr_liv_area
## Min. : 1.000 Min. : 1.000 Min. : 311 Min. : 261.6
## 1st Qu.: 5.000 1st Qu.: 5.000 1st Qu.:3193 1st Qu.:2139.6
## Median : 6.000 Median : 6.000 Median :3528 Median :2547.0
## Mean : 6.329 Mean : 6.082 Mean :3510 Mean :2511.2
## 3rd Qu.: 8.000 3rd Qu.: 7.000 3rd Qu.:3852 3rd Qu.:2852.3
## Max. :12.000 Max. :10.000 Max. :5171 Max. :4273.5
## tot_bathrooms lot_area exter_qual full_bath central_air
## Min. :1.000 Min. : 465.9 Min. :2.00 Min. :0.000 yes:1335
## 1st Qu.:3.000 1st Qu.: 3725.6 1st Qu.:3.00 1st Qu.:2.000 no : 95
## Median :3.000 Median : 4138.8 Median :3.00 Median :4.000
## Mean :2.952 Mean : 4070.4 Mean :3.39 Mean :3.115
## 3rd Qu.:3.000 3rd Qu.: 4529.0 3rd Qu.:4.00 3rd Qu.:4.000
## Max. :8.000 Max. :10271.5 Max. :5.00 Max. :6.000
## garage_type_attchd garage_type_basment garage_type_builtIn garage_type_detchd
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.5888 Mean :0.01329 Mean :0.06084 Mean :0.2699
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.0000
## garage_type_no_garage NeighborhoodBrDale neighborhood_brk_side
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.05664 Mean :0.01119 Mean :0.04056
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_clear_cr neighborhood_collg_cr neighborhood_crawfor
## Min. :0.00000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00000
## Mean :0.01958 Mean :0.1049 Mean :0.03566
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00000
## neighborhood_edwards neighborhood_gilbert neighborhood_idottrr
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06853 Mean :0.05524 Mean :0.02587
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_meadowv neighborhood_mitchel neighborhood_n_ames
## Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.01189 Mean :0.03427 Mean :0.1573
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000
## neighborhood_n_ridge neighborhood_n_ridge_hghts neighborhood_n_w_ames
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.02867 Mean :0.05385 Mean :0.05105
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_old_town neighborhood_sawyer neighborhood_sawyer_w
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.07902 Mean :0.05175 Mean :0.04126
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_somerst neighborhood_stone_br neighborhood_swisu
## Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.06014 Mean :0.01748 Mean :0.01748
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.00000 Max. :1.00000
## neighborhood_timber neighborhood_veenker neighborhood_saleprice
## Min. :0.00000 Min. :0.000000 Min. :101389
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:136902
## Median :0.00000 Median :0.000000 Median :181841
## Mean :0.02657 Mean :0.007692 Mean :180909
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:198518
## Max. :1.00000 Max. :1.000000 Max. :331835
## log_rel_price log_sale_price sale_price_per_sqft bath_pr_sqft
## Min. :-1.19109 Min. :10.46 Min. :0.0001087 Min. :0.0002562
## 1st Qu.:-0.18330 1st Qu.:11.77 1st Qu.:0.0002471 1st Qu.:0.0006980
## Median :-0.03704 Median :12.00 Median :0.0002804 Median :0.0008476
## Mean :-0.03482 Mean :12.02 Mean :0.0002850 Mean :0.0008319
## 3rd Qu.: 0.11353 3rd Qu.:12.28 3rd Qu.:0.0003160 3rd Qu.:0.0010070
## Max. : 1.33727 Max. :13.53 Max. :0.0009952 Max. :0.0032157
## student_apt
## Min. :0.0000
## 1st Qu.:0.0000
## Median :0.0000
## Mean :0.2455
## 3rd Qu.:0.0000
## Max. :1.0000
No code is provided.
df %>% filter(
(garage_type_attchd + garage_type_basment + garage_type_detchd + garage_type_builtIn + garage_type_no_garage) > 0) %>%
head()
## applicants sale_price num_units year_sold month_sold overall_qual
## 1 3 208500 4 2008 2 7
## 2 5 181500 5 2007 5 6
## 3 3 223500 1 2008 9 7
## 4 4 140000 3 2011 2 7
## 5 5 250000 5 2008 12 8
## 6 5 143000 3 2009 10 5
## total_sq_feet gr_liv_area tot_bathrooms lot_area exter_qual full_bath
## 1 3586.886 2785.060 5 3898.256 4 4
## 2 3560.348 2315.267 3 4149.410 3 4
## 3 3672.314 2852.313 5 4461.603 4 4
## 4 3527.523 2791.378 3 4139.131 3 2
## 5 4012.276 3173.357 5 4928.292 4 4
## 6 3308.431 2433.181 3 4908.174 3 2
## central_air garage_type_attchd garage_type_basment garage_type_builtIn
## 1 yes 1 0 0
## 2 yes 1 0 0
## 3 yes 1 0 0
## 4 yes 0 0 0
## 5 yes 1 0 0
## 6 yes 1 0 0
## garage_type_detchd garage_type_no_garage NeighborhoodBrDale
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_brk_side neighborhood_clear_cr neighborhood_collg_cr
## 1 0 0 1
## 2 0 0 0
## 3 0 0 1
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_crawfor neighborhood_edwards neighborhood_gilbert
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 1 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_idottrr neighborhood_meadowv neighborhood_mitchel
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 1
## neighborhood_n_ames neighborhood_n_ridge neighborhood_n_ridge_hghts
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 1 0
## 6 0 0 0
## neighborhood_n_w_ames neighborhood_old_town neighborhood_sawyer
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_sawyer_w neighborhood_somerst neighborhood_stone_br
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_swisu neighborhood_timber neighborhood_veenker
## 1 0 0 0
## 2 0 0 1
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## neighborhood_saleprice log_rel_price log_sale_price sale_price_per_sqft
## 1 198517.7 0.04906067 12.24769 0.0002928122
## 2 245890.6 -0.30363117 12.10901 0.0002073205
## 3 198517.7 0.11853305 12.31717 0.0003065762
## 4 213681.7 -0.42284528 11.84940 0.0001857338
## 5 331835.3 -0.28317780 12.42922 0.0001877702
## 6 157755.8 -0.09820383 11.87060 0.0002739861
## bath_pr_sqft student_apt
## 1 0.0013939666 0
## 2 0.0008426143 0
## 3 0.0013615394 0
## 4 0.0008504552 0
## 5 0.0012461753 0
## 6 0.0009067742 0
#rows_with_errors <- (df$garage_type_attchd==0) + (df$garage_type_detchd==0) + (df$garage_type_builtIn==0) + (df$garage_type_no_garage==0)
#df <- df[-rows_with_errors]
No changes are needed to the below code. It is included because the results may be helpful for subsequent tasks.
#NO CHANGES NEEDED
#Example: the 18th property has 0's for all garage type variables
df %>% dplyr::select(contains("garage")) %>% dplyr::slice(18)
## garage_type_attchd garage_type_basment garage_type_builtIn garage_type_detchd
## 1 0 0 0 0
## garage_type_no_garage
## 1 0
#NO CHANGES NEEDED
#Calculate the Number of Units for each garage type
df %>% dplyr::select(num_units,contains("garage")) %>% gather(feature,value,-num_units) %>% group_by(feature) %>% summarise(total_units=sum(num_units*value))
## # A tibble: 5 × 2
## feature total_units
## <chr> <dbl>
## 1 garage_type_attchd 2227
## 2 garage_type_basment 55
## 3 garage_type_builtIn 235
## 4 garage_type_detchd 1052
## 5 garage_type_no_garage 226
#NO CHANGES NEEDED
#Calculate the Number of Units for each neighborhood
df %>% dplyr::select(num_units,contains("neighborhood")) %>% gather(feature,value,-num_units) %>% group_by(feature) %>% summarise(total_units=sum(num_units*value))
## # A tibble: 23 × 2
## feature total_units
## <chr> <dbl>
## 1 NeighborhoodBrDale 44
## 2 neighborhood_brk_side 170
## 3 neighborhood_clear_cr 72
## 4 neighborhood_collg_cr 390
## 5 neighborhood_crawfor 137
## 6 neighborhood_edwards 259
## 7 neighborhood_gilbert 204
## 8 neighborhood_idottrr 112
## 9 neighborhood_meadowv 48
## 10 neighborhood_mitchel 141
## # ℹ 13 more rows
You do not need to check the neighborhood columns as your assistent has already verified that these are correct for all properties.
No code is provided
#NO CHANGES NEEDED
#Create training and test sets
index <- createDataPartition(y = df$applicants, p = 0.8, list = F)
train <- df %>% dplyr::slice(index)
test <- df %>% dplyr::slice(-index)
train_x <- train[,-1]
train_y <- train$applicants
#standardize to be between 0 and 1
train_number_of_units <- train$num_units/sum(train$num_units)
test_x <- test[,-1]
test_y <- test$applicants
#standardize to be between 0 and 1
test_number_of_units <- test$num_units/sum(test$num_units)
#the base (reference) level for neighborhood is the one with the most observations
#the base (reference) level for garage is the one with the most observations
# Create the log_sale_price variable
train <- train %>%
mutate(log_sale_price = log(sale_price))
test <- test %>%
mutate(log_sale_price = log(sale_price))
#Fit a GLM
#Do not use the offset or weight variables as predictors. Remove them from the formula (as the below is doing)
#Type ?family into the R console to see options for FAMILY
glm <- glm(
applicants ~ . + offset(log(num_units)) - neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price,
data = train,
family = poisson(link = "log")
)
AIC(glm)
## [1] 5228.818
summary(glm)
##
## Call:
## glm(formula = applicants ~ . + offset(log(num_units)) - neighborhood_n_ames -
## garage_type_attchd - num_units - log_sale_price, family = poisson(link = "log"),
## data = train)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.612e+00 2.188e+01 0.211 0.833062
## sale_price 6.177e-07 8.633e-07 0.716 0.474284
## year_sold -3.159e-03 1.088e-02 -0.290 0.771571
## month_sold -7.745e-03 6.154e-03 -1.258 0.208227
## overall_qual -4.370e-02 2.487e-02 -1.758 0.078831 .
## total_sq_feet 6.023e-04 1.234e-04 4.879 1.07e-06 ***
## gr_liv_area -1.777e-05 7.970e-05 -0.223 0.823556
## tot_bathrooms -3.178e-01 1.119e-01 -2.841 0.004502 **
## lot_area 3.411e-06 2.402e-05 0.142 0.887072
## exter_qual 1.621e-01 4.573e-02 3.544 0.000394 ***
## full_bath 1.128e-02 2.489e-02 0.453 0.650537
## central_airno -1.780e+00 1.625e-01 -10.950 < 2e-16 ***
## garage_type_basment -6.379e-01 1.790e-01 -3.563 0.000366 ***
## garage_type_builtIn -2.797e-01 7.508e-02 -3.725 0.000195 ***
## garage_type_detchd -3.346e-01 5.183e-02 -6.455 1.08e-10 ***
## garage_type_no_garage -3.939e-01 9.798e-02 -4.021 5.80e-05 ***
## NeighborhoodBrDale -4.755e-01 2.562e-01 -1.856 0.063429 .
## neighborhood_brk_side -1.957e-01 1.163e-01 -1.684 0.092249 .
## neighborhood_clear_cr -4.917e-02 1.275e-01 -0.386 0.699756
## neighborhood_collg_cr -8.473e-02 8.668e-02 -0.977 0.328356
## neighborhood_crawfor -1.207e-01 1.202e-01 -1.004 0.315313
## neighborhood_edwards 7.295e-02 8.902e-02 0.819 0.412545
## neighborhood_gilbert 1.496e-02 9.569e-02 0.156 0.875738
## neighborhood_idottrr -8.539e-01 2.115e-01 -4.037 5.41e-05 ***
## neighborhood_meadowv -3.208e-01 2.008e-01 -1.597 0.110193
## neighborhood_mitchel -1.280e-01 9.581e-02 -1.337 0.181385
## neighborhood_n_ridge -3.254e-01 1.969e-01 -1.653 0.098379 .
## neighborhood_n_ridge_hghts -3.719e-01 1.735e-01 -2.143 0.032097 *
## neighborhood_n_w_ames -8.980e-02 9.123e-02 -0.984 0.324963
## neighborhood_old_town -4.565e-02 8.864e-02 -0.515 0.606522
## neighborhood_sawyer -4.130e-01 9.983e-02 -4.137 3.52e-05 ***
## neighborhood_sawyer_w -5.371e-02 9.290e-02 -0.578 0.563154
## neighborhood_somerst -3.092e-02 1.227e-01 -0.252 0.801017
## neighborhood_stone_br -1.747e-01 1.993e-01 -0.877 0.380701
## neighborhood_swisu 2.543e-01 1.504e-01 1.691 0.090895 .
## neighborhood_timber -1.799e-01 1.442e-01 -1.248 0.212172
## neighborhood_veenker -3.291e-01 2.109e-01 -1.560 0.118722
## neighborhood_saleprice NA NA NA NA
## log_rel_price -7.583e-01 3.327e-01 -2.280 0.022635 *
## sale_price_per_sqft -1.534e+03 1.210e+03 -1.268 0.204712
## bath_pr_sqft 1.234e+03 3.836e+02 3.217 0.001294 **
## student_apt 4.999e-01 3.452e-02 14.482 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2784.1 on 1144 degrees of freedom
## Residual deviance: 1722.6 on 1104 degrees of freedom
## AIC: 5228.8
##
## Number of Fisher Scoring iterations: 5
#Alternative answer
#Models claim frequency (average applicants per unit)
glm2 <- glm(
applicants/num_units ~ . + - neighborhood_n_ames - garage_type_attchd - log_sale_price - num_units,
data = train,
weights = num_units,
family = poisson(link = "log")
)
AIC(glm2)
## [1] Inf
summary(glm2)
##
## Call:
## glm(formula = applicants/num_units ~ . + -neighborhood_n_ames -
## garage_type_attchd - log_sale_price - num_units, family = poisson(link = "log"),
## data = train, weights = num_units)
##
## Coefficients: (2 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.283e+00 2.187e+01 0.196 0.844719
## sale_price 6.177e-07 8.633e-07 0.716 0.474284
## year_sold -3.159e-03 1.088e-02 -0.290 0.771571
## month_sold -7.745e-03 6.154e-03 -1.258 0.208227
## overall_qual -4.370e-02 2.487e-02 -1.758 0.078831 .
## total_sq_feet 6.023e-04 1.234e-04 4.879 1.07e-06 ***
## gr_liv_area -1.777e-05 7.970e-05 -0.223 0.823556
## tot_bathrooms -3.178e-01 1.119e-01 -2.841 0.004502 **
## lot_area 3.411e-06 2.402e-05 0.142 0.887072
## exter_qual 1.621e-01 4.573e-02 3.544 0.000394 ***
## full_bath 1.128e-02 2.489e-02 0.453 0.650538
## central_airno -1.780e+00 1.625e-01 -10.950 < 2e-16 ***
## garage_type_basment -6.379e-01 1.790e-01 -3.563 0.000366 ***
## garage_type_builtIn -2.797e-01 7.508e-02 -3.725 0.000195 ***
## garage_type_detchd -3.346e-01 5.183e-02 -6.455 1.08e-10 ***
## garage_type_no_garage -3.939e-01 9.798e-02 -4.021 5.80e-05 ***
## NeighborhoodBrDale -1.464e-01 3.439e-01 -0.426 0.670320
## neighborhood_brk_side 1.333e-01 2.485e-01 0.537 0.591566
## neighborhood_clear_cr 2.799e-01 2.124e-01 1.318 0.187653
## neighborhood_collg_cr 2.443e-01 1.925e-01 1.269 0.204319
## neighborhood_crawfor 2.084e-01 2.065e-01 1.009 0.313056
## neighborhood_edwards 4.020e-01 2.354e-01 1.708 0.087652 .
## neighborhood_gilbert 3.440e-01 1.990e-01 1.729 0.083767 .
## neighborhood_idottrr -5.248e-01 3.120e-01 -1.682 0.092607 .
## neighborhood_meadowv 8.288e-03 3.029e-01 0.027 0.978168
## neighborhood_mitchel 2.010e-01 2.183e-01 0.921 0.357056
## neighborhood_n_ames 3.291e-01 2.109e-01 1.560 0.118722
## neighborhood_n_ridge 3.637e-03 2.102e-01 0.017 0.986197
## neighborhood_n_ridge_hghts -4.286e-02 2.010e-01 -0.213 0.831107
## neighborhood_n_w_ames 2.393e-01 1.982e-01 1.207 0.227450
## neighborhood_old_town 2.834e-01 2.310e-01 1.227 0.219763
## neighborhood_sawyer -8.389e-02 2.315e-01 -0.362 0.717115
## neighborhood_sawyer_w 2.754e-01 2.029e-01 1.357 0.174676
## neighborhood_somerst 2.981e-01 1.954e-01 1.526 0.127104
## neighborhood_stone_br 1.544e-01 2.213e-01 0.698 0.485302
## neighborhood_swisu 5.834e-01 2.565e-01 2.274 0.022956 *
## neighborhood_timber 1.492e-01 2.034e-01 0.734 0.463238
## neighborhood_veenker NA NA NA NA
## neighborhood_saleprice NA NA NA NA
## log_rel_price -7.583e-01 3.327e-01 -2.280 0.022635 *
## sale_price_per_sqft -1.534e+03 1.210e+03 -1.268 0.204713
## bath_pr_sqft 1.234e+03 3.836e+02 3.217 0.001294 **
## student_apt 4.999e-01 3.452e-02 14.482 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2784.1 on 1144 degrees of freedom
## Residual deviance: 1722.6 on 1104 degrees of freedom
## AIC: Inf
##
## Number of Fisher Scoring iterations: 5
#You can verify that both models return the same predictions
app_count <- predict(glm, type="response", newdata = test)
app_frequency <- predict(glm2, type="response", newdata = test)*test$num_units
head(app_count)
## 1 2 3 4 5 6
## 7.466909 1.381043 1.193778 3.390522 3.733967 10.568552
head(app_frequency)
## 1 2 3 4 5 6
## 7.466909 1.381043 1.193778 3.390522 3.733967 10.568552
#NO CODE CHANGES NEEDED
#The following code will use the same formula and family from the `glm` object to perform stepwise selection
library(MASS)
stepwise_result <- stepAIC(glm)
## Start: AIC=5228.82
## applicants ~ sale_price + num_units + year_sold + month_sold +
## overall_qual + total_sq_feet + gr_liv_area + tot_bathrooms +
## lot_area + exter_qual + full_bath + central_air + garage_type_attchd +
## garage_type_basment + garage_type_builtIn + garage_type_detchd +
## garage_type_no_garage + NeighborhoodBrDale + neighborhood_brk_side +
## neighborhood_clear_cr + neighborhood_collg_cr + neighborhood_crawfor +
## neighborhood_edwards + neighborhood_gilbert + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ames +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w +
## neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + neighborhood_saleprice +
## log_rel_price + log_sale_price + sale_price_per_sqft + bath_pr_sqft +
## student_apt + offset(log(num_units)) - neighborhood_n_ames -
## garage_type_attchd - num_units - log_sale_price
##
##
## Step: AIC=5228.82
## applicants ~ sale_price + year_sold + month_sold + overall_qual +
## total_sq_feet + gr_liv_area + tot_bathrooms + lot_area +
## exter_qual + full_bath + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr +
## neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards +
## neighborhood_gilbert + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts +
## neighborhood_n_w_ames + neighborhood_old_town + neighborhood_sawyer +
## neighborhood_sawyer_w + neighborhood_somerst + neighborhood_stone_br +
## neighborhood_swisu + neighborhood_timber + neighborhood_veenker +
## log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## - lot_area 1 1722.6 5226.8
## - neighborhood_gilbert 1 1722.6 5226.8
## - gr_liv_area 1 1722.7 5226.9
## - neighborhood_somerst 1 1722.7 5226.9
## - year_sold 1 1722.7 5226.9
## - neighborhood_clear_cr 1 1722.8 5227.0
## - full_bath 1 1722.8 5227.0
## - neighborhood_old_town 1 1722.9 5227.1
## - neighborhood_sawyer_w 1 1722.9 5227.2
## - sale_price 1 1723.1 5227.3
## - neighborhood_edwards 1 1723.3 5227.5
## - neighborhood_stone_br 1 1723.4 5227.6
## - neighborhood_collg_cr 1 1723.6 5227.8
## - neighborhood_n_w_ames 1 1723.6 5227.8
## - neighborhood_crawfor 1 1723.6 5227.8
## - neighborhood_timber 1 1724.2 5228.4
## - month_sold 1 1724.2 5228.4
## - sale_price_per_sqft 1 1724.3 5228.5
## - neighborhood_mitchel 1 1724.4 5228.6
## <none> 1722.6 5228.8
## - neighborhood_veenker 1 1725.2 5229.4
## - neighborhood_swisu 1 1725.3 5229.5
## - neighborhood_n_ridge 1 1725.3 5229.5
## - neighborhood_meadowv 1 1725.3 5229.5
## - neighborhood_brk_side 1 1725.5 5229.7
## - overall_qual 1 1725.7 5229.9
## - NeighborhoodBrDale 1 1726.5 5230.7
## - neighborhood_n_ridge_hghts 1 1727.1 5231.3
## - log_rel_price 1 1727.5 5231.7
## - tot_bathrooms 1 1730.6 5234.8
## - bath_pr_sqft 1 1732.8 5237.0
## - exter_qual 1 1735.1 5239.3
## - garage_type_builtIn 1 1737.3 5241.5
## - garage_type_basment 1 1737.9 5242.1
## - garage_type_no_garage 1 1739.8 5244.0
## - neighborhood_sawyer 1 1741.2 5245.4
## - total_sq_feet 1 1741.5 5245.7
## - neighborhood_idottrr 1 1742.7 5246.9
## - garage_type_detchd 1 1765.4 5269.6
## - central_air 1 1916.0 5420.2
## - student_apt 1 1923.6 5427.8
##
## Step: AIC=5226.84
## applicants ~ sale_price + year_sold + month_sold + overall_qual +
## total_sq_feet + gr_liv_area + tot_bathrooms + exter_qual +
## full_bath + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_gilbert +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w +
## neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_gilbert 1 1722.7 5224.9
## - gr_liv_area 1 1722.7 5224.9
## - neighborhood_somerst 1 1722.7 5224.9
## - year_sold 1 1722.7 5224.9
## - neighborhood_clear_cr 1 1722.8 5225.0
## - full_bath 1 1722.8 5225.0
## - neighborhood_old_town 1 1722.9 5225.1
## - neighborhood_sawyer_w 1 1723.0 5225.2
## - sale_price 1 1723.2 5225.4
## - neighborhood_edwards 1 1723.3 5225.5
## - neighborhood_stone_br 1 1723.4 5225.6
## - neighborhood_n_w_ames 1 1723.6 5225.8
## - neighborhood_collg_cr 1 1723.6 5225.8
## - neighborhood_crawfor 1 1723.6 5225.9
## - neighborhood_timber 1 1724.2 5226.4
## - month_sold 1 1724.2 5226.4
## - sale_price_per_sqft 1 1724.3 5226.5
## - neighborhood_mitchel 1 1724.4 5226.7
## <none> 1722.6 5226.8
## - neighborhood_veenker 1 1725.2 5227.4
## - neighborhood_swisu 1 1725.3 5227.5
## - neighborhood_n_ridge 1 1725.3 5227.6
## - neighborhood_brk_side 1 1725.6 5227.8
## - overall_qual 1 1725.9 5228.1
## - neighborhood_meadowv 1 1725.9 5228.1
## - NeighborhoodBrDale 1 1727.0 5229.2
## - neighborhood_n_ridge_hghts 1 1727.2 5229.5
## - log_rel_price 1 1727.5 5229.7
## - tot_bathrooms 1 1730.7 5232.9
## - bath_pr_sqft 1 1732.9 5235.1
## - exter_qual 1 1735.1 5237.3
## - garage_type_builtIn 1 1737.4 5239.6
## - garage_type_basment 1 1737.9 5240.1
## - garage_type_no_garage 1 1740.0 5242.2
## - neighborhood_sawyer 1 1741.2 5243.4
## - total_sq_feet 1 1741.7 5243.9
## - neighborhood_idottrr 1 1742.7 5244.9
## - garage_type_detchd 1 1766.2 5268.5
## - central_air 1 1916.1 5418.3
## - student_apt 1 1923.7 5425.9
##
## Step: AIC=5224.86
## applicants ~ sale_price + year_sold + month_sold + overall_qual +
## total_sq_feet + gr_liv_area + tot_bathrooms + exter_qual +
## full_bath + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town +
## neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_somerst +
## neighborhood_stone_br + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - gr_liv_area 1 1722.7 5222.9
## - year_sold 1 1722.7 5223.0
## - neighborhood_clear_cr 1 1722.9 5223.1
## - full_bath 1 1722.9 5223.1
## - neighborhood_somerst 1 1722.9 5223.1
## - neighborhood_old_town 1 1723.0 5223.2
## - neighborhood_sawyer_w 1 1723.3 5223.5
## - neighborhood_edwards 1 1723.3 5223.5
## - sale_price 1 1723.5 5223.7
## - neighborhood_stone_br 1 1724.1 5224.3
## - neighborhood_crawfor 1 1724.1 5224.3
## - month_sold 1 1724.3 5224.5
## - neighborhood_n_w_ames 1 1724.3 5224.5
## - sale_price_per_sqft 1 1724.6 5224.8
## <none> 1722.7 5224.9
## - neighborhood_collg_cr 1 1724.8 5225.0
## - neighborhood_mitchel 1 1724.8 5225.0
## - neighborhood_timber 1 1725.2 5225.5
## - neighborhood_swisu 1 1725.3 5225.5
## - neighborhood_brk_side 1 1725.7 5225.9
## - neighborhood_veenker 1 1725.8 5226.1
## - neighborhood_meadowv 1 1725.9 5226.1
## - overall_qual 1 1725.9 5226.1
## - NeighborhoodBrDale 1 1727.0 5227.2
## - neighborhood_n_ridge 1 1727.3 5227.5
## - log_rel_price 1 1727.5 5227.8
## - neighborhood_n_ridge_hghts 1 1731.1 5231.3
## - tot_bathrooms 1 1731.3 5231.5
## - bath_pr_sqft 1 1733.8 5234.0
## - exter_qual 1 1735.6 5235.8
## - garage_type_builtIn 1 1737.4 5237.6
## - garage_type_basment 1 1737.9 5238.1
## - garage_type_no_garage 1 1740.4 5240.7
## - total_sq_feet 1 1741.8 5242.0
## - neighborhood_sawyer 1 1741.8 5242.0
## - neighborhood_idottrr 1 1742.8 5243.0
## - garage_type_detchd 1 1767.4 5267.7
## - central_air 1 1919.3 5419.6
## - student_apt 1 1923.8 5424.0
##
## Step: AIC=5222.91
## applicants ~ sale_price + year_sold + month_sold + overall_qual +
## total_sq_feet + tot_bathrooms + exter_qual + full_bath +
## central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_clear_cr + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town +
## neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_somerst +
## neighborhood_stone_br + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - year_sold 1 1722.8 5221.0
## - full_bath 1 1722.9 5221.1
## - neighborhood_somerst 1 1722.9 5221.2
## - neighborhood_clear_cr 1 1722.9 5221.2
## - neighborhood_old_town 1 1723.0 5221.2
## - neighborhood_sawyer_w 1 1723.3 5221.5
## - neighborhood_edwards 1 1723.3 5221.6
## - sale_price 1 1723.6 5221.8
## - neighborhood_stone_br 1 1724.1 5222.3
## - neighborhood_crawfor 1 1724.2 5222.5
## - month_sold 1 1724.3 5222.5
## - neighborhood_n_w_ames 1 1724.4 5222.6
## - sale_price_per_sqft 1 1724.7 5222.9
## <none> 1722.7 5222.9
## - neighborhood_collg_cr 1 1724.8 5223.0
## - neighborhood_mitchel 1 1724.8 5223.0
## - neighborhood_timber 1 1725.3 5223.5
## - neighborhood_swisu 1 1725.3 5223.5
## - neighborhood_brk_side 1 1725.7 5224.0
## - neighborhood_veenker 1 1725.9 5224.1
## - neighborhood_meadowv 1 1726.0 5224.2
## - overall_qual 1 1726.0 5224.2
## - NeighborhoodBrDale 1 1727.1 5225.3
## - neighborhood_n_ridge 1 1727.5 5225.7
## - log_rel_price 1 1727.6 5225.8
## - neighborhood_n_ridge_hghts 1 1731.1 5229.3
## - tot_bathrooms 1 1731.3 5229.6
## - bath_pr_sqft 1 1733.8 5232.0
## - exter_qual 1 1735.8 5234.0
## - garage_type_basment 1 1738.2 5236.4
## - garage_type_builtIn 1 1739.3 5237.5
## - garage_type_no_garage 1 1740.5 5238.7
## - neighborhood_sawyer 1 1741.8 5240.1
## - neighborhood_idottrr 1 1742.9 5241.1
## - total_sq_feet 1 1743.7 5241.9
## - garage_type_detchd 1 1767.7 5265.9
## - central_air 1 1921.7 5419.9
## - student_apt 1 1923.9 5422.1
##
## Step: AIC=5221
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + full_bath + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr +
## neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w +
## neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - full_bath 1 1723.0 5219.2
## - neighborhood_clear_cr 1 1723.0 5219.2
## - neighborhood_somerst 1 1723.0 5219.2
## - neighborhood_old_town 1 1723.1 5219.3
## - neighborhood_sawyer_w 1 1723.4 5219.6
## - neighborhood_edwards 1 1723.4 5219.6
## - sale_price 1 1723.7 5219.9
## - neighborhood_stone_br 1 1724.2 5220.4
## - month_sold 1 1724.3 5220.5
## - neighborhood_crawfor 1 1724.3 5220.6
## - neighborhood_n_w_ames 1 1724.5 5220.7
## - sale_price_per_sqft 1 1724.8 5221.0
## <none> 1722.8 5221.0
## - neighborhood_collg_cr 1 1724.8 5221.1
## - neighborhood_mitchel 1 1724.9 5221.1
## - neighborhood_timber 1 1725.3 5221.5
## - neighborhood_swisu 1 1725.4 5221.6
## - neighborhood_brk_side 1 1725.9 5222.1
## - neighborhood_veenker 1 1726.0 5222.2
## - neighborhood_meadowv 1 1726.1 5222.3
## - overall_qual 1 1726.2 5222.4
## - NeighborhoodBrDale 1 1727.2 5223.4
## - neighborhood_n_ridge 1 1727.6 5223.8
## - log_rel_price 1 1727.7 5223.9
## - neighborhood_n_ridge_hghts 1 1731.1 5227.4
## - tot_bathrooms 1 1731.5 5227.7
## - bath_pr_sqft 1 1734.0 5230.2
## - exter_qual 1 1735.9 5232.1
## - garage_type_basment 1 1738.2 5234.5
## - garage_type_builtIn 1 1739.5 5235.7
## - garage_type_no_garage 1 1740.5 5236.7
## - neighborhood_sawyer 1 1741.9 5238.1
## - neighborhood_idottrr 1 1742.9 5239.1
## - total_sq_feet 1 1743.8 5240.0
## - garage_type_detchd 1 1767.7 5263.9
## - central_air 1 1922.6 5418.8
## - student_apt 1 1924.5 5420.7
##
## Step: AIC=5219.22
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr +
## neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w +
## neighborhood_somerst + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_somerst 1 1723.2 5217.5
## - neighborhood_clear_cr 1 1723.3 5217.5
## - neighborhood_old_town 1 1723.3 5217.5
## - neighborhood_sawyer_w 1 1723.6 5217.8
## - neighborhood_edwards 1 1723.7 5217.9
## - sale_price 1 1724.0 5218.2
## - month_sold 1 1724.5 5218.7
## - neighborhood_stone_br 1 1724.6 5218.8
## - neighborhood_n_w_ames 1 1724.7 5218.9
## - neighborhood_crawfor 1 1724.7 5218.9
## - sale_price_per_sqft 1 1725.0 5219.2
## <none> 1723.0 5219.2
## - neighborhood_collg_cr 1 1725.1 5219.3
## - neighborhood_mitchel 1 1725.1 5219.3
## - neighborhood_timber 1 1725.7 5219.9
## - neighborhood_swisu 1 1725.7 5219.9
## - neighborhood_brk_side 1 1726.1 5220.3
## - overall_qual 1 1726.2 5220.4
## - neighborhood_meadowv 1 1726.3 5220.5
## - neighborhood_veenker 1 1726.5 5220.7
## - NeighborhoodBrDale 1 1727.3 5221.6
## - log_rel_price 1 1728.2 5222.4
## - neighborhood_n_ridge 1 1728.3 5222.5
## - tot_bathrooms 1 1731.6 5225.8
## - neighborhood_n_ridge_hghts 1 1731.8 5226.0
## - bath_pr_sqft 1 1734.3 5228.5
## - exter_qual 1 1736.4 5230.6
## - garage_type_basment 1 1738.5 5232.7
## - garage_type_builtIn 1 1739.5 5233.7
## - garage_type_no_garage 1 1740.7 5234.9
## - neighborhood_sawyer 1 1742.0 5236.3
## - neighborhood_idottrr 1 1743.0 5237.2
## - total_sq_feet 1 1745.2 5239.4
## - garage_type_detchd 1 1768.5 5262.7
## - central_air 1 1922.6 5416.8
## - student_apt 1 1926.9 5421.1
##
## Step: AIC=5217.45
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_clear_cr +
## neighborhood_collg_cr + neighborhood_crawfor + neighborhood_edwards +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_old_town + neighborhood_sawyer + neighborhood_sawyer_w +
## neighborhood_stone_br + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_clear_cr 1 1723.4 5215.6
## - neighborhood_old_town 1 1723.5 5215.7
## - neighborhood_sawyer_w 1 1723.7 5215.9
## - neighborhood_edwards 1 1723.8 5216.1
## - sale_price 1 1724.0 5216.2
## - neighborhood_stone_br 1 1724.6 5216.8
## - neighborhood_n_w_ames 1 1724.7 5216.9
## - neighborhood_crawfor 1 1724.7 5216.9
## - month_sold 1 1724.7 5216.9
## - sale_price_per_sqft 1 1725.0 5217.2
## - neighborhood_mitchel 1 1725.2 5217.4
## <none> 1723.2 5217.5
## - neighborhood_collg_cr 1 1725.2 5217.5
## - neighborhood_timber 1 1725.8 5218.0
## - neighborhood_swisu 1 1726.0 5218.2
## - neighborhood_veenker 1 1726.5 5218.7
## - neighborhood_brk_side 1 1726.5 5218.7
## - neighborhood_meadowv 1 1726.8 5219.0
## - overall_qual 1 1727.0 5219.3
## - NeighborhoodBrDale 1 1727.8 5220.0
## - log_rel_price 1 1728.2 5220.4
## - neighborhood_n_ridge 1 1729.7 5221.9
## - tot_bathrooms 1 1731.6 5223.8
## - bath_pr_sqft 1 1734.3 5226.5
## - neighborhood_n_ridge_hghts 1 1735.9 5228.2
## - exter_qual 1 1736.5 5228.7
## - garage_type_basment 1 1738.7 5230.9
## - garage_type_builtIn 1 1739.5 5231.7
## - garage_type_no_garage 1 1740.8 5233.0
## - neighborhood_sawyer 1 1742.4 5234.6
## - neighborhood_idottrr 1 1744.0 5236.2
## - total_sq_feet 1 1745.2 5237.5
## - garage_type_detchd 1 1769.8 5262.0
## - central_air 1 1923.6 5415.8
## - student_apt 1 1929.5 5421.7
##
## Step: AIC=5215.6
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_old_town +
## neighborhood_sawyer + neighborhood_sawyer_w + neighborhood_stone_br +
## neighborhood_swisu + neighborhood_timber + neighborhood_veenker +
## log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_old_town 1 1723.7 5213.9
## - neighborhood_sawyer_w 1 1723.8 5214.0
## - neighborhood_edwards 1 1724.0 5214.2
## - sale_price 1 1724.0 5214.2
## - neighborhood_n_w_ames 1 1724.7 5214.9
## - neighborhood_stone_br 1 1724.7 5214.9
## - neighborhood_crawfor 1 1724.7 5215.0
## - month_sold 1 1724.9 5215.1
## - sale_price_per_sqft 1 1725.1 5215.3
## - neighborhood_collg_cr 1 1725.3 5215.5
## - neighborhood_mitchel 1 1725.3 5215.5
## <none> 1723.4 5215.6
## - neighborhood_timber 1 1725.8 5216.0
## - neighborhood_swisu 1 1726.2 5216.4
## - neighborhood_veenker 1 1726.5 5216.7
## - neighborhood_brk_side 1 1726.6 5216.8
## - neighborhood_meadowv 1 1726.9 5217.1
## - overall_qual 1 1727.1 5217.3
## - NeighborhoodBrDale 1 1728.0 5218.2
## - log_rel_price 1 1728.3 5218.5
## - neighborhood_n_ridge 1 1729.7 5219.9
## - tot_bathrooms 1 1731.7 5221.9
## - bath_pr_sqft 1 1734.3 5224.5
## - neighborhood_n_ridge_hghts 1 1736.1 5226.3
## - exter_qual 1 1737.2 5227.5
## - garage_type_basment 1 1738.8 5229.0
## - garage_type_builtIn 1 1739.6 5229.8
## - garage_type_no_garage 1 1740.8 5231.0
## - neighborhood_sawyer 1 1742.5 5232.7
## - neighborhood_idottrr 1 1744.3 5234.5
## - total_sq_feet 1 1745.2 5235.5
## - garage_type_detchd 1 1769.9 5260.1
## - central_air 1 1923.8 5414.0
## - student_apt 1 1930.2 5420.4
##
## Step: AIC=5213.87
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer +
## neighborhood_sawyer_w + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_sawyer_w 1 1724.0 5212.2
## - neighborhood_edwards 1 1724.7 5212.9
## - sale_price 1 1724.7 5212.9
## - neighborhood_n_w_ames 1 1724.9 5213.1
## - neighborhood_crawfor 1 1725.0 5213.2
## - month_sold 1 1725.1 5213.4
## - neighborhood_stone_br 1 1725.2 5213.4
## - neighborhood_mitchel 1 1725.4 5213.6
## - neighborhood_collg_cr 1 1725.5 5213.7
## <none> 1723.7 5213.9
## - sale_price_per_sqft 1 1725.7 5213.9
## - neighborhood_timber 1 1726.2 5214.4
## - neighborhood_brk_side 1 1726.6 5214.8
## - neighborhood_swisu 1 1726.8 5215.1
## - neighborhood_veenker 1 1726.9 5215.1
## - neighborhood_meadowv 1 1727.0 5215.2
## - overall_qual 1 1727.4 5215.6
## - NeighborhoodBrDale 1 1728.0 5216.2
## - log_rel_price 1 1728.5 5216.8
## - neighborhood_n_ridge 1 1730.8 5219.0
## - tot_bathrooms 1 1732.4 5220.6
## - bath_pr_sqft 1 1735.2 5223.4
## - exter_qual 1 1737.4 5225.6
## - neighborhood_n_ridge_hghts 1 1737.6 5225.8
## - garage_type_basment 1 1739.1 5227.3
## - garage_type_builtIn 1 1739.8 5228.0
## - garage_type_no_garage 1 1742.4 5230.6
## - neighborhood_sawyer 1 1742.7 5230.9
## - neighborhood_idottrr 1 1744.8 5233.0
## - total_sq_feet 1 1745.4 5233.7
## - garage_type_detchd 1 1778.8 5267.0
## - student_apt 1 1930.5 5418.7
## - central_air 1 1936.7 5424.9
##
## Step: AIC=5212.18
## applicants ~ sale_price + month_sold + overall_qual + total_sq_feet +
## tot_bathrooms + exter_qual + central_air + garage_type_basment +
## garage_type_builtIn + garage_type_detchd + garage_type_no_garage +
## NeighborhoodBrDale + neighborhood_brk_side + neighborhood_collg_cr +
## neighborhood_crawfor + neighborhood_edwards + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer +
## neighborhood_stone_br + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - sale_price 1 1724.9 5211.1
## - neighborhood_n_w_ames 1 1725.0 5211.3
## - neighborhood_edwards 1 1725.0 5211.3
## - neighborhood_crawfor 1 1725.2 5211.4
## - neighborhood_stone_br 1 1725.3 5211.5
## - month_sold 1 1725.5 5211.7
## - neighborhood_collg_cr 1 1725.5 5211.7
## - neighborhood_mitchel 1 1725.6 5211.8
## <none> 1724.0 5212.2
## - sale_price_per_sqft 1 1726.0 5212.2
## - neighborhood_timber 1 1726.3 5212.6
## - neighborhood_brk_side 1 1726.9 5213.1
## - neighborhood_veenker 1 1727.0 5213.3
## - neighborhood_swisu 1 1727.2 5213.4
## - neighborhood_meadowv 1 1727.2 5213.4
## - overall_qual 1 1727.7 5213.9
## - NeighborhoodBrDale 1 1728.3 5214.5
## - log_rel_price 1 1728.8 5215.0
## - neighborhood_n_ridge 1 1730.8 5217.0
## - tot_bathrooms 1 1732.8 5219.0
## - bath_pr_sqft 1 1735.6 5221.8
## - neighborhood_n_ridge_hghts 1 1737.6 5223.8
## - exter_qual 1 1737.6 5223.8
## - garage_type_basment 1 1739.4 5225.6
## - garage_type_builtIn 1 1740.0 5226.2
## - garage_type_no_garage 1 1742.5 5228.7
## - neighborhood_sawyer 1 1742.8 5229.0
## - neighborhood_idottrr 1 1745.0 5231.2
## - total_sq_feet 1 1745.7 5231.9
## - garage_type_detchd 1 1778.9 5265.1
## - student_apt 1 1932.2 5418.4
## - central_air 1 1936.7 5422.9
##
## Step: AIC=5211.14
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor +
## neighborhood_edwards + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts +
## neighborhood_n_w_ames + neighborhood_sawyer + neighborhood_stone_br +
## neighborhood_swisu + neighborhood_timber + neighborhood_veenker +
## log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_edwards 1 1725.6 5209.8
## - neighborhood_stone_br 1 1725.6 5209.9
## - neighborhood_crawfor 1 1725.7 5209.9
## - neighborhood_n_w_ames 1 1725.8 5210.0
## - neighborhood_collg_cr 1 1726.2 5210.5
## - sale_price_per_sqft 1 1726.5 5210.7
## - month_sold 1 1726.5 5210.7
## - neighborhood_timber 1 1726.7 5210.9
## - neighborhood_mitchel 1 1726.7 5210.9
## <none> 1724.9 5211.1
## - neighborhood_veenker 1 1727.5 5211.7
## - overall_qual 1 1727.8 5212.0
## - neighborhood_swisu 1 1727.9 5212.1
## - neighborhood_brk_side 1 1728.5 5212.7
## - neighborhood_meadowv 1 1729.0 5213.2
## - log_rel_price 1 1729.5 5213.7
## - NeighborhoodBrDale 1 1730.1 5214.3
## - neighborhood_n_ridge 1 1731.2 5215.4
## - tot_bathrooms 1 1733.0 5217.2
## - bath_pr_sqft 1 1735.8 5220.0
## - neighborhood_n_ridge_hghts 1 1738.9 5223.1
## - exter_qual 1 1740.5 5224.7
## - garage_type_builtIn 1 1740.7 5224.9
## - garage_type_basment 1 1740.7 5224.9
## - garage_type_no_garage 1 1742.9 5227.1
## - neighborhood_sawyer 1 1745.4 5229.6
## - total_sq_feet 1 1747.2 5231.4
## - neighborhood_idottrr 1 1748.0 5232.2
## - garage_type_detchd 1 1780.5 5264.7
## - student_apt 1 1932.8 5417.0
## - central_air 1 1937.1 5421.3
##
## Step: AIC=5209.79
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_sawyer + neighborhood_stone_br + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_stone_br 1 1726.2 5208.4
## - neighborhood_crawfor 1 1726.4 5208.6
## - neighborhood_n_w_ames 1 1726.5 5208.7
## - neighborhood_collg_cr 1 1727.0 5209.2
## - sale_price_per_sqft 1 1727.1 5209.3
## - month_sold 1 1727.2 5209.4
## - neighborhood_timber 1 1727.3 5209.5
## <none> 1725.6 5209.8
## - neighborhood_mitchel 1 1727.7 5209.9
## - neighborhood_veenker 1 1728.1 5210.3
## - neighborhood_swisu 1 1728.3 5210.5
## - overall_qual 1 1728.9 5211.1
## - neighborhood_brk_side 1 1729.9 5212.1
## - log_rel_price 1 1729.9 5212.1
## - neighborhood_meadowv 1 1730.2 5212.4
## - NeighborhoodBrDale 1 1731.2 5213.4
## - neighborhood_n_ridge 1 1731.5 5213.7
## - tot_bathrooms 1 1733.7 5215.9
## - bath_pr_sqft 1 1736.5 5218.7
## - neighborhood_n_ridge_hghts 1 1739.0 5221.3
## - garage_type_basment 1 1740.8 5223.0
## - exter_qual 1 1741.1 5223.3
## - garage_type_builtIn 1 1741.3 5223.6
## - garage_type_no_garage 1 1743.1 5225.3
## - neighborhood_sawyer 1 1747.7 5229.9
## - total_sq_feet 1 1747.7 5229.9
## - neighborhood_idottrr 1 1750.2 5232.4
## - garage_type_detchd 1 1780.6 5262.8
## - student_apt 1 1933.2 5415.4
## - central_air 1 1937.5 5419.7
##
## Step: AIC=5208.41
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_collg_cr + neighborhood_crawfor +
## neighborhood_idottrr + neighborhood_meadowv + neighborhood_mitchel +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_n_w_ames +
## neighborhood_sawyer + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_crawfor 1 1726.9 5207.1
## - neighborhood_n_w_ames 1 1727.0 5207.2
## - neighborhood_collg_cr 1 1727.3 5207.5
## - neighborhood_timber 1 1727.6 5207.8
## - month_sold 1 1728.0 5208.2
## - sale_price_per_sqft 1 1728.1 5208.3
## <none> 1726.2 5208.4
## - neighborhood_mitchel 1 1728.3 5208.5
## - neighborhood_veenker 1 1728.5 5208.7
## - neighborhood_swisu 1 1728.9 5209.1
## - log_rel_price 1 1730.0 5210.2
## - overall_qual 1 1730.3 5210.5
## - neighborhood_brk_side 1 1730.7 5210.9
## - neighborhood_meadowv 1 1731.0 5211.2
## - neighborhood_n_ridge 1 1731.5 5211.7
## - NeighborhoodBrDale 1 1731.9 5212.1
## - tot_bathrooms 1 1735.1 5215.3
## - bath_pr_sqft 1 1738.0 5218.2
## - neighborhood_n_ridge_hghts 1 1739.3 5219.5
## - garage_type_basment 1 1741.3 5221.5
## - exter_qual 1 1741.4 5221.6
## - garage_type_builtIn 1 1742.3 5222.5
## - garage_type_no_garage 1 1743.6 5223.8
## - total_sq_feet 1 1747.8 5228.0
## - neighborhood_sawyer 1 1748.6 5228.8
## - neighborhood_idottrr 1 1751.2 5231.4
## - garage_type_detchd 1 1780.8 5261.0
## - student_apt 1 1935.2 5415.4
## - central_air 1 1937.5 5417.7
##
## Step: AIC=5207.14
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_collg_cr + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_n_w_ames + neighborhood_sawyer +
## neighborhood_swisu + neighborhood_timber + neighborhood_veenker +
## log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_n_w_ames 1 1727.5 5205.8
## - neighborhood_collg_cr 1 1727.9 5206.1
## - neighborhood_timber 1 1728.2 5206.4
## - month_sold 1 1728.8 5207.0
## - sale_price_per_sqft 1 1728.8 5207.0
## - neighborhood_mitchel 1 1728.9 5207.1
## <none> 1726.9 5207.1
## - neighborhood_veenker 1 1729.2 5207.4
## - neighborhood_swisu 1 1729.8 5208.0
## - log_rel_price 1 1730.7 5208.9
## - neighborhood_brk_side 1 1731.2 5209.4
## - overall_qual 1 1731.4 5209.6
## - neighborhood_meadowv 1 1731.7 5209.9
## - neighborhood_n_ridge 1 1731.9 5210.1
## - NeighborhoodBrDale 1 1732.5 5210.8
## - tot_bathrooms 1 1735.6 5213.8
## - bath_pr_sqft 1 1738.5 5216.7
## - neighborhood_n_ridge_hghts 1 1739.5 5217.7
## - garage_type_basment 1 1742.4 5220.6
## - garage_type_builtIn 1 1742.9 5221.1
## - exter_qual 1 1743.0 5221.2
## - garage_type_no_garage 1 1744.2 5222.5
## - total_sq_feet 1 1748.2 5226.4
## - neighborhood_sawyer 1 1749.0 5227.2
## - neighborhood_idottrr 1 1751.7 5229.9
## - garage_type_detchd 1 1782.6 5260.8
## - student_apt 1 1935.2 5413.5
## - central_air 1 1938.9 5417.1
##
## Step: AIC=5205.75
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_collg_cr + neighborhood_idottrr +
## neighborhood_meadowv + neighborhood_mitchel + neighborhood_n_ridge +
## neighborhood_n_ridge_hghts + neighborhood_sawyer + neighborhood_swisu +
## neighborhood_timber + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_collg_cr 1 1728.3 5204.5
## - neighborhood_timber 1 1728.6 5204.8
## - neighborhood_mitchel 1 1729.3 5205.5
## - month_sold 1 1729.3 5205.5
## - sale_price_per_sqft 1 1729.4 5205.6
## <none> 1727.5 5205.8
## - neighborhood_veenker 1 1729.6 5205.8
## - neighborhood_swisu 1 1730.5 5206.7
## - log_rel_price 1 1731.2 5207.4
## - neighborhood_brk_side 1 1731.9 5208.1
## - neighborhood_n_ridge 1 1732.1 5208.3
## - neighborhood_meadowv 1 1732.3 5208.5
## - overall_qual 1 1732.3 5208.5
## - NeighborhoodBrDale 1 1733.2 5209.4
## - tot_bathrooms 1 1736.4 5212.6
## - bath_pr_sqft 1 1739.2 5215.4
## - neighborhood_n_ridge_hghts 1 1739.6 5215.8
## - garage_type_basment 1 1742.7 5218.9
## - garage_type_builtIn 1 1743.4 5219.6
## - garage_type_no_garage 1 1744.5 5220.7
## - exter_qual 1 1745.8 5222.0
## - total_sq_feet 1 1748.6 5224.8
## - neighborhood_sawyer 1 1749.2 5225.4
## - neighborhood_idottrr 1 1752.3 5228.5
## - garage_type_detchd 1 1782.7 5258.9
## - student_apt 1 1936.7 5412.9
## - central_air 1 1938.9 5415.1
##
## Step: AIC=5204.5
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts +
## neighborhood_sawyer + neighborhood_swisu + neighborhood_timber +
## neighborhood_veenker + log_rel_price + sale_price_per_sqft +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_timber 1 1729.2 5203.4
## - neighborhood_mitchel 1 1729.9 5204.1
## - month_sold 1 1730.0 5204.2
## - sale_price_per_sqft 1 1730.1 5204.3
## - neighborhood_veenker 1 1730.2 5204.4
## <none> 1728.3 5204.5
## - neighborhood_swisu 1 1731.3 5205.5
## - log_rel_price 1 1732.0 5206.3
## - neighborhood_n_ridge 1 1732.3 5206.5
## - neighborhood_brk_side 1 1732.6 5206.8
## - neighborhood_meadowv 1 1732.9 5207.1
## - overall_qual 1 1733.3 5207.5
## - NeighborhoodBrDale 1 1733.8 5208.1
## - tot_bathrooms 1 1737.1 5211.3
## - neighborhood_n_ridge_hghts 1 1739.6 5213.8
## - bath_pr_sqft 1 1739.8 5214.1
## - garage_type_basment 1 1743.3 5217.5
## - garage_type_builtIn 1 1744.0 5218.2
## - garage_type_no_garage 1 1745.1 5219.3
## - exter_qual 1 1745.8 5220.0
## - neighborhood_sawyer 1 1749.6 5223.8
## - total_sq_feet 1 1749.7 5223.9
## - neighborhood_idottrr 1 1753.1 5227.3
## - garage_type_detchd 1 1782.7 5256.9
## - student_apt 1 1937.0 5411.2
## - central_air 1 1939.1 5413.3
##
## Step: AIC=5203.37
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_mitchel + neighborhood_n_ridge + neighborhood_n_ridge_hghts +
## neighborhood_sawyer + neighborhood_swisu + neighborhood_veenker +
## log_rel_price + sale_price_per_sqft + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_mitchel 1 1730.7 5202.9
## - month_sold 1 1730.7 5202.9
## - neighborhood_veenker 1 1731.0 5203.2
## - sale_price_per_sqft 1 1731.0 5203.2
## <none> 1729.2 5203.4
## - neighborhood_swisu 1 1732.3 5204.5
## - log_rel_price 1 1732.7 5204.9
## - neighborhood_n_ridge 1 1732.7 5204.9
## - neighborhood_brk_side 1 1733.5 5205.7
## - neighborhood_meadowv 1 1733.9 5206.1
## - overall_qual 1 1734.6 5206.8
## - NeighborhoodBrDale 1 1734.8 5207.0
## - tot_bathrooms 1 1738.1 5210.3
## - neighborhood_n_ridge_hghts 1 1739.7 5211.9
## - bath_pr_sqft 1 1740.9 5213.1
## - garage_type_basment 1 1744.9 5217.1
## - garage_type_builtIn 1 1745.0 5217.2
## - garage_type_no_garage 1 1745.9 5218.1
## - exter_qual 1 1746.5 5218.7
## - total_sq_feet 1 1750.2 5222.4
## - neighborhood_sawyer 1 1750.5 5222.8
## - neighborhood_idottrr 1 1754.2 5226.4
## - garage_type_detchd 1 1783.2 5255.4
## - student_apt 1 1937.8 5410.0
## - central_air 1 1939.6 5411.9
##
## Step: AIC=5202.89
## applicants ~ month_sold + overall_qual + total_sq_feet + tot_bathrooms +
## exter_qual + central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer +
## neighborhood_swisu + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - month_sold 1 1732.2 5202.4
## - sale_price_per_sqft 1 1732.4 5202.6
## - neighborhood_veenker 1 1732.4 5202.7
## <none> 1730.7 5202.9
## - neighborhood_swisu 1 1733.9 5204.1
## - neighborhood_n_ridge 1 1734.3 5204.5
## - log_rel_price 1 1734.4 5204.7
## - neighborhood_brk_side 1 1734.8 5205.0
## - neighborhood_meadowv 1 1735.1 5205.3
## - overall_qual 1 1735.8 5206.0
## - NeighborhoodBrDale 1 1736.2 5206.4
## - tot_bathrooms 1 1739.5 5209.7
## - neighborhood_n_ridge_hghts 1 1741.4 5211.6
## - bath_pr_sqft 1 1742.1 5212.4
## - garage_type_basment 1 1746.1 5216.3
## - garage_type_builtIn 1 1746.4 5216.6
## - garage_type_no_garage 1 1748.1 5218.3
## - exter_qual 1 1748.8 5219.0
## - neighborhood_sawyer 1 1751.3 5221.5
## - total_sq_feet 1 1752.0 5222.2
## - neighborhood_idottrr 1 1755.4 5225.6
## - garage_type_detchd 1 1783.6 5253.8
## - central_air 1 1940.0 5410.2
## - student_apt 1 1940.8 5411.0
##
## Step: AIC=5202.41
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual +
## central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer +
## neighborhood_swisu + neighborhood_veenker + log_rel_price +
## sale_price_per_sqft + bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - sale_price_per_sqft 1 1733.9 5202.1
## - neighborhood_veenker 1 1734.0 5202.2
## <none> 1732.2 5202.4
## - neighborhood_swisu 1 1735.5 5203.7
## - neighborhood_n_ridge 1 1735.8 5204.0
## - log_rel_price 1 1735.9 5204.1
## - neighborhood_brk_side 1 1736.2 5204.4
## - neighborhood_meadowv 1 1736.6 5204.8
## - overall_qual 1 1737.5 5205.7
## - NeighborhoodBrDale 1 1737.6 5205.8
## - tot_bathrooms 1 1741.2 5209.4
## - neighborhood_n_ridge_hghts 1 1742.9 5211.1
## - bath_pr_sqft 1 1744.0 5212.2
## - garage_type_basment 1 1747.6 5215.8
## - garage_type_builtIn 1 1748.2 5216.4
## - garage_type_no_garage 1 1749.7 5217.9
## - exter_qual 1 1749.9 5218.1
## - neighborhood_sawyer 1 1752.7 5220.9
## - total_sq_feet 1 1753.6 5221.8
## - neighborhood_idottrr 1 1756.6 5224.8
## - garage_type_detchd 1 1786.1 5254.3
## - central_air 1 1942.9 5411.1
## - student_apt 1 1943.5 5411.8
##
## Step: AIC=5202.14
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual +
## central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer +
## neighborhood_swisu + neighborhood_veenker + log_rel_price +
## bath_pr_sqft + student_apt + offset(log(num_units))
##
## Df Deviance AIC
## - neighborhood_veenker 1 1735.9 5202.1
## <none> 1733.9 5202.1
## - neighborhood_swisu 1 1737.5 5203.7
## - neighborhood_meadowv 1 1738.2 5204.5
## - neighborhood_brk_side 1 1738.5 5204.8
## - neighborhood_n_ridge 1 1739.2 5205.4
## - overall_qual 1 1739.2 5205.4
## - NeighborhoodBrDale 1 1739.4 5205.6
## - tot_bathrooms 1 1741.2 5207.4
## - bath_pr_sqft 1 1744.0 5210.3
## - neighborhood_n_ridge_hghts 1 1746.1 5212.3
## - garage_type_basment 1 1750.5 5216.7
## - garage_type_builtIn 1 1750.8 5217.0
## - exter_qual 1 1751.7 5217.9
## - garage_type_no_garage 1 1753.4 5219.6
## - neighborhood_sawyer 1 1754.4 5220.7
## - neighborhood_idottrr 1 1759.3 5225.5
## - total_sq_feet 1 1771.5 5237.7
## - garage_type_detchd 1 1789.6 5255.8
## - log_rel_price 1 1867.2 5333.5
## - student_apt 1 1944.6 5410.8
## - central_air 1 1958.5 5424.8
##
## Step: AIC=5202.13
## applicants ~ overall_qual + total_sq_feet + tot_bathrooms + exter_qual +
## central_air + garage_type_basment + garage_type_builtIn +
## garage_type_detchd + garage_type_no_garage + NeighborhoodBrDale +
## neighborhood_brk_side + neighborhood_idottrr + neighborhood_meadowv +
## neighborhood_n_ridge + neighborhood_n_ridge_hghts + neighborhood_sawyer +
## neighborhood_swisu + log_rel_price + bath_pr_sqft + student_apt +
## offset(log(num_units))
##
## Df Deviance AIC
## <none> 1735.9 5202.1
## - neighborhood_swisu 1 1739.5 5203.7
## - neighborhood_meadowv 1 1740.2 5204.5
## - neighborhood_brk_side 1 1740.6 5204.8
## - neighborhood_n_ridge 1 1740.8 5205.0
## - overall_qual 1 1741.3 5205.5
## - NeighborhoodBrDale 1 1741.4 5205.6
## - tot_bathrooms 1 1743.3 5207.5
## - bath_pr_sqft 1 1746.2 5210.4
## - neighborhood_n_ridge_hghts 1 1747.5 5211.7
## - garage_type_basment 1 1752.3 5216.6
## - garage_type_builtIn 1 1752.4 5216.6
## - exter_qual 1 1753.5 5217.7
## - garage_type_no_garage 1 1755.2 5219.4
## - neighborhood_sawyer 1 1756.4 5220.6
## - neighborhood_idottrr 1 1761.4 5225.6
## - total_sq_feet 1 1773.3 5237.5
## - garage_type_detchd 1 1790.9 5255.1
## - log_rel_price 1 1868.0 5332.2
## - student_apt 1 1946.3 5410.5
## - central_air 1 1960.3 5424.5
library(dplyr)
final_glm <- glm(
stepwise_result$formula,
data = train,
family = glm$family
)
AIC(final_glm)
## [1] 5202.127
summary(final_glm)
##
## Call:
## glm(formula = stepwise_result$formula, family = glm$family, data = train)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.183e+00 3.149e-01 -6.930 4.20e-12 ***
## overall_qual -4.870e-02 2.096e-02 -2.324 0.020140 *
## total_sq_feet 6.071e-04 9.058e-05 6.702 2.05e-11 ***
## tot_bathrooms -2.298e-01 7.537e-02 -3.049 0.002295 **
## exter_qual 1.716e-01 4.078e-02 4.207 2.58e-05 ***
## central_airno -1.798e+00 1.587e-01 -11.332 < 2e-16 ***
## garage_type_basment -6.425e-01 1.755e-01 -3.661 0.000251 ***
## garage_type_builtIn -2.754e-01 7.026e-02 -3.920 8.87e-05 ***
## garage_type_detchd -3.378e-01 4.655e-02 -7.256 4.00e-13 ***
## garage_type_no_garage -3.825e-01 9.100e-02 -4.204 2.62e-05 ***
## NeighborhoodBrDale -5.172e-01 2.396e-01 -2.158 0.030906 *
## neighborhood_brk_side -2.225e-01 1.056e-01 -2.107 0.035078 *
## neighborhood_idottrr -8.875e-01 2.015e-01 -4.404 1.06e-05 ***
## neighborhood_meadowv -3.599e-01 1.824e-01 -1.973 0.048472 *
## neighborhood_n_ridge -1.858e-01 8.568e-02 -2.169 0.030100 *
## neighborhood_n_ridge_hghts -2.378e-01 7.099e-02 -3.350 0.000808 ***
## neighborhood_sawyer -4.018e-01 9.394e-02 -4.277 1.89e-05 ***
## neighborhood_swisu 2.794e-01 1.426e-01 1.960 0.050047 .
## log_rel_price -1.001e+00 8.661e-02 -11.553 < 2e-16 ***
## bath_pr_sqft 9.349e+02 2.516e+02 3.715 0.000203 ***
## student_apt 4.941e-01 3.311e-02 14.922 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for poisson family taken to be 1)
##
## Null deviance: 2784.1 on 1144 degrees of freedom
## Residual deviance: 1735.9 on 1124 degrees of freedom
## AIC: 5202.1
##
## Number of Fisher Scoring iterations: 5
plot(final_glm)
#THE ONLY CHANGES NEEDED
#1. Replace "WEIGHT_VARIABLE" with the weight variable that you used for the GLM
#2. In the "lasso_formula" below, use your formula from task 6
#3. Use the same FAMILY from task 6. Type ?glmnet into the console to read the possible options
lasso_formula <- applicants ~ . + offset(log(num_units)) - neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price
lasso_x_train <- model.matrix(lasso_formula, data = train)
lasso_x_test <- model.matrix(lasso_formula, data = test)
control <-trainControl(method="cv", number=15)
lassoGrid <- expand.grid(alpha = 1, lambda = seq(0.0001,0.01,by = 0.0005))
#Fit a lasso and inspect the variables which have zero coefficients.
#Note that the glmnet library only allows the identity link function
lasso <- train(x = lasso_x_train,
y = train_y,
method = 'glmnet',
family = "poisson",
trControl = control,
tuneGrid = lassoGrid)
#This code gets the coefficients from the LASSO which are zero
lasso_results <- varImp(lasso,scale=F)
lasso_coefficients <- lasso_results$importance
#Show the coefficients which are zero
variables_with_zeros <- colnames(lasso_x_train)[which(lasso_coefficients$Overall==0)]
print("Variables with Coefficients of Zero:")
## [1] "Variables with Coefficients of Zero:"
cat( paste( variables_with_zeros, collapse='\n' ) )
## sale_price
## year_sold
## month_sold
## tot_bathrooms
## lot_area
## neighborhood_gilbert
## neighborhood_n_w_ames
## log_rel_price
The following code creates eight samples of the data. 20% of records are taken, with replacement, from the data randomly.
sample1 <- train %>% sample_frac(0.2,replace=T)
sample2 <- train %>% sample_frac(0.2,replace=T)
sample3 <- train %>% sample_frac(0.2,replace=T)
sample4 <- train %>% sample_frac(0.2,replace=T)
sample5 <- train %>% sample_frac(0.2,replace=T)
sample6 <- train %>% sample_frac(0.2,replace=T)
sample7 <- train %>% sample_frac(0.2,replace=T)
sample8 <- train %>% sample_frac(0.2,replace=T)
This is the setting for the decision tree paramters. Make your adjustments here to test out different paramters.
#ctrl <- rpart.control(minbucket = 5, cp = 0.01, maxdepth = 5)
ctrl <- rpart.control(minbucket = 10, cp = 0.001, maxdepth = 5)
#ctrl <- rpart.control(minbucket = 10, cp = 0.01, maxdepth = 7)
The following code sets up a decision tree using all the variables in the dataframe. Each tree uses only 20% of the records. The number of units are used as the weights.
tree1 <- rpart(data = sample1, lasso_formula, weights = sample1$num_units, control = ctrl)
tree2 <- rpart(data = sample2, lasso_formula, weights = sample2$num_units, control = ctrl)
tree3 <- rpart(data = sample3, lasso_formula, weights = sample3$num_units, control = ctrl)
tree4 <- rpart(data = sample4, lasso_formula, weights = sample4$num_units, control = ctrl)
tree5 <- rpart(data = sample5, lasso_formula, weights = sample5$num_units, control = ctrl)
tree6 <- rpart(data = sample6, lasso_formula, weights = sample6$num_units, control = ctrl)
tree7 <- rpart(data = sample7, lasso_formula, weights = sample7$num_units, control = ctrl)
tree8 <- rpart(data = sample8, lasso_formula, weights = sample8$num_units, control = ctrl)
rpart.plot(tree1, cex = .7)
rpart.plot(tree2, cex = .7)
rpart.plot(tree3, cex = .7)
rpart.plot(tree4, cex = .7)
rpart.plot(tree5, cex = .7)
rpart.plot(tree6, cex = .7)
rpart.plot(tree7, cex = .7)
rpart.plot(tree8, cex = .7)
#make predictions
tree1_pred <- predict(tree1, newdata = test, type = "vector")
tree2_pred <- predict(tree2, newdata = test, type = "vector")
tree3_pred <- predict(tree3, newdata = test, type = "vector")
tree4_pred <- predict(tree4, newdata = test, type = "vector")
tree5_pred <- predict(tree5, newdata = test, type = "vector")
tree6_pred <- predict(tree6, newdata = test, type = "vector")
tree7_pred <- predict(tree7, newdata = test, type = "vector")
tree8_pred <- predict(tree8, newdata = test, type = "vector")
#UPDATE THIS FORMULA TO PERFORM BAGGING
trees_pred <- (tree1_pred + tree2_pred + tree3_pred+ tree4_pred+ tree5_pred+ tree6_pred+ tree7_pred+ tree8_pred)/8
LLfunction <- function(targets, predicted_values){
p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
p_v_pos <- ifelse(predicted_values <= 0, 0.000001 ,predicted_values)
return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
# "targets" is a vector containing the actual values for the target variable
# "predicted_values" is a vector containing the predicted values for the target variable
LLfunction(test$applicants, trees_pred)
## [1] 343.9751
LLfunction(test$applicants, tree1_pred)
## [1] 264.3733
#NO CHANGES NEEDED
RF <- randomForest( applicants ~ . + neighborhood_n_ames - garage_type_attchd - num_units - log_sale_price,
data = train,
weights = train_number_of_units,
ntree=400,
importance=TRUE)
imp_RF <- importance(RF)
imp_DF <- data.frame(Variables = row.names(imp_RF), MSE = imp_RF[,1])
imp_DF <- imp_DF[order(imp_DF$MSE, decreasing = TRUE),]
ggplot(imp_DF[1:30,], aes(x=reorder(Variables, MSE), y=MSE, fill=MSE)) + geom_bar(stat = 'identity') + labs(x = 'Variables', y= '% increase MSE if variable is randomly permuted') + coord_flip() + theme(legend.position="none")
#This function will calculate the log-liklihood based on a Poisson model for the number of applicants
LLfunction <- function(targets, predicted_values){
p_v_zero <- ifelse(predicted_values <= 0, 0, predicted_values)
p_v_pos <- ifelse(predicted_values <= 0, 0.000001, predicted_values)
return(sum(targets*log(p_v_pos)) - sum(p_v_zero))
}
print("loglikelihood")
## [1] "loglikelihood"
glm_pred <- predict(final_glm, type="response", newdata = test)
lasso_pred <- exp(predict(lasso,newdata = lasso_x_test, weights = test_number_of_units))
RF_pred <- predict(RF, type="response", newdata = test)
tibble(Model = c("GLM", "LASSO", "Bagged Trees", "Random Forest"),
LogLikelihood = c(LLfunction(test$applicants,glm_pred),
LLfunction(test$applicants,lasso_pred),
LLfunction(test$applicants, trees_pred),
LLfunction(test$applicants,RF_pred)))
## # A tibble: 4 × 2
## Model LogLikelihood
## <chr> <dbl>
## 1 GLM 237.
## 2 LASSO -54465.
## 3 Bagged Trees 344.
## 4 Random Forest 444.