Smarket<-read_csv("data/Smarket.csv")#> Rows: 1250 Columns: 9#> ── Column specification ────────────────────────────────────────────────────────#> Delimiter: ","#> chr (1): Direction#> dbl (8): Year, Lag1, Lag2, Lag3, Lag4, Lag5, Volume, Today#> #> ℹ Use `spec()` to retrieve the full column specification for this data.#> ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.Smarket$Direction<-factor(Smarket$Direction)head(Smarket)#> # A tibble: 6 × 9#> Year Lag1 Lag2 Lag3 Lag4 Lag5 Volume Today Direction#> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <fct> #> 1 2001 0.381 -0.192 -2.62 -1.06 5.01 1.19 0.959 Up #> 2 2001 0.959 0.381 -0.192 -2.62 -1.06 1.30 1.03 Up #> 3 2001 1.03 0.959 0.381 -0.192 -2.62 1.41 -0.623 Down #> 4 2001 -0.623 1.03 0.959 0.381 -0.192 1.28 0.614 Up #> 5 2001 0.614 -0.623 1.03 0.959 0.381 1.21 0.213 Up #> 6 2001 0.213 0.614 -0.623 1.03 0.959 1.35 1.39 Up
Code
knn_spec<-nearest_neighbor(neighbors =3)|>set_mode("classification")|>set_engine("kknn")knn_fit<-knn_spec|>fit(Direction~Lag1+Lag2, data =Smarket)knn_fit#> parsnip model object#> #> #> Call:#> kknn::train.kknn(formula = Direction ~ Lag1 + Lag2, data = data, ks = min_rows(3, data, 5))#> #> Type of response variable: nominal#> Minimal misclassification: 0.5064#> Best kernel: optimal#> Best k: 3
Code
augment(knn_fit, new_data =Smarket)|>conf_mat(truth =Direction, estimate =.pred_class)#> Truth#> Prediction Down Up#> Down 602 0#> Up 0 648
lda_spec<-discrim_linear()%>%set_mode("classification")%>%set_engine("MASS")lda_fit<-lda_spec%>%fit(Direction~Lag1+Lag2, data =Smarket)lda_fit#> parsnip model object#> #> Call:#> lda(Direction ~ Lag1 + Lag2, data = data)#> #> Prior probabilities of groups:#> Down Up #> 0.4816 0.5184 #> #> Group means:#> Lag1 Lag2#> Down 0.05068605 0.03229734#> Up -0.03969136 -0.02244444#> #> Coefficients of linear discriminants:#> LD1#> Lag1 -0.7567605#> Lag2 -0.4707872
1.4 模型比较
Code
models<-list("LDA"=lda_fit,"KNN"=knn_fit)preds<-imap_dfr(models, augment, new_data =Smarket, .id ="model")preds%>%dplyr::select(model, Direction, .pred_class, .pred_Down, .pred_Up)#> # A tibble: 2,500 × 5#> model Direction .pred_class .pred_Down .pred_Up#> <chr> <fct> <fct> <dbl> <dbl>#> 1 LDA Up Up 0.486 0.514#> 2 LDA Up Down 0.503 0.497#> 3 LDA Down Down 0.510 0.490#> 4 LDA Up Up 0.482 0.518#> 5 LDA Up Up 0.485 0.515#> 6 LDA Up Up 0.492 0.508#> 7 LDA Down Down 0.509 0.491#> 8 LDA Up Up 0.490 0.510#> 9 LDA Up Up 0.477 0.523#> 10 LDA Up Down 0.505 0.495#> # ℹ 2,490 more rows