17  多元线性回归

\[ Y_i=\beta_0+\sum_{i=1}^p \beta_p X_{pi}+\epsilon_i,其中\epsilon_i\sim N(0,\sigma^2) \]

Y=

在矩阵表示法中,因变量是一个向量 Y,每个样本都有一行。自变量组合成一个矩阵X,其中每个特征有一列,另外还有一列 1值用于截距。每列每个示例都有一行。回归系数β和残差ε也是向量。

向量β的最佳估计值计算为:

\[ \hat{\mathbf{\beta}}= (X^TX)^{-1}X^TY \]

Code
reg <- function(y, x) { 
    x <- as.matrix(x) 
    x <- cbind(Intercept = 1, x) 
    b <- solve(t(x) %*% x) %*% t(x) %*% y 
    colnames(b) <- "estimate" 
    print(b) 
}

# solve()取矩阵的逆
# t()用于转置矩阵
# %*% 将两个矩阵相乘

advertising<-read_csv("data/ISLR/Advertising.csv")

reg(advertising$sales,advertising[c(-1,-5)])
#>               estimate
#> Intercept  2.938889369
#> TV         0.045764645
#> radio      0.188530017
#> newspaper -0.001037493

17.1 多元回归

Code
library(tidymodels)
library(patchwork)
library(ggfortify)

# 检测变量相关关系
ad <- advertising %>% select(-1)
cor(ad)
#>                   TV      radio  newspaper     sales
#> TV        1.00000000 0.05480866 0.05664787 0.7822244
#> radio     0.05480866 1.00000000 0.35410375 0.5762226
#> newspaper 0.05664787 0.35410375 1.00000000 0.2282990
#> sales     0.78222442 0.57622257 0.22829903 1.0000000

17.1.1 rms::ols()

Code

ols_mlm <- rms::ols(sales~TV+radio+newspaper,data = advertising)
ols_mlm 
#> Linear Regression Model
#> 
#> rms::ols(formula = sales ~ TV + radio + newspaper, data = advertising)
#> 
#>                 Model Likelihood    Discrimination    
#>                       Ratio Test           Indexes    
#> Obs     200    LR chi2    455.01    R2       0.897    
#> sigma1.6855    d.f.            3    R2 adj   0.896    
#> d.f.    196    Pr(> chi2) 0.0000    g        5.685    
#> 
#> Residuals
#> 
#>     Min      1Q  Median      3Q     Max 
#> -8.8277 -0.8908  0.2418  1.1893  2.8292 
#> 
#> 
#>           Coef    S.E.   t     Pr(>|t|)
#> Intercept  2.9389 0.3119  9.42 <0.0001 
#> TV         0.0458 0.0014 32.81 <0.0001 
#> radio      0.1885 0.0086 21.89 <0.0001 
#> newspaper -0.0010 0.0059 -0.18 0.8599
texreg::texreg(ols_mlm)
#> 
#> \begin{table}
#> \begin{center}
#> \begin{tabular}{l c}
#> \hline
#>  & Model 1 \\
#> \hline
#> Intercept  & $2.94^{***}$ \\
#>            & $(0.31)$     \\
#> TV         & $0.05^{***}$ \\
#>            & $(0.00)$     \\
#> radio      & $0.19^{***}$ \\
#>            & $(0.01)$     \\
#> newspaper  & $-0.00$      \\
#>            & $(0.01)$     \\
#> \hline
#> Num. obs.  & $200$        \\
#> R$^2$      & $0.90$       \\
#> Adj. R$^2$ & $0.90$       \\
#> L.R.       & $455.01$     \\
#> \hline
#> \multicolumn{2}{l}{\scriptsize{$^{***}p<0.001$; $^{**}p<0.01$; $^{*}p<0.05$}}
#> \end{tabular}
#> \caption{Statistical models}
#> \label{table:coefficients}
#> \end{center}
#> \end{table}

car::vif(ols_mlm)
#>        TV     radio newspaper 
#>  3.966283  3.970266  3.410504
anova(ols_mlm)
#>                 Analysis of Variance          Response: sales 
#> 
#>  Factor     d.f. Partial SS   MS           F       P     
#>  TV           1  3.058010e+03 3.058010e+03 1076.41 <.0001
#>  radio        1  1.361737e+03 1.361737e+03  479.33 <.0001
#>  newspaper    1  8.871717e-02 8.871717e-02    0.03 0.8599
#>  REGRESSION   3  4.860323e+03 1.620108e+03  570.27 <.0001
#>  ERROR      196  5.568253e+02 2.840945e+00

17.1.2 lm()

Code
# 多元线性回归
lm_spec <-linear_reg() %>%
  set_mode("regression") %>%
  set_engine("lm")  
lm_spec
#> Linear Regression Model Specification (regression)
#> 
#> Computational engine: lm
lm_sales_multi<- lm_spec %>% fit(sales~TV+radio+newspaper,data = advertising)
summary(lm_sales_multi$fit)
#> 
#> Call:
#> stats::lm(formula = sales ~ TV + radio + newspaper, data = data)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -8.8277 -0.8908  0.2418  1.1893  2.8292 
#> 
#> Coefficients:
#>              Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.938889   0.311908   9.422   <2e-16 ***
#> TV           0.045765   0.001395  32.809   <2e-16 ***
#> radio        0.188530   0.008611  21.893   <2e-16 ***
#> newspaper   -0.001037   0.005871  -0.177     0.86    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.686 on 196 degrees of freedom
#> Multiple R-squared:  0.8972, Adjusted R-squared:  0.8956 
#> F-statistic: 570.3 on 3 and 196 DF,  p-value: < 2.2e-16

tidy(lm_sales_multi)
term estimate std.error statistic p.value
(Intercept) 2.9388894 0.3119082 9.4222884 0.0000000
TV 0.0457646 0.0013949 32.8086244 0.0000000
radio 0.1885300 0.0086112 21.8934961 0.0000000
newspaper -0.0010375 0.0058710 -0.1767146 0.8599151
Code
glance(lm_sales_multi)
r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual nobs
0.8972106 0.8956373 1.68551 570.2707 0 3 -386.1811 782.3622 798.8538 556.8253 196 200
Code

logLik(lm_sales_multi$fit)
#> 'log Lik.' -386.1811 (df=5)
car::vif(lm_sales_multi$fit)
#>        TV     radio newspaper 
#>  1.004611  1.144952  1.145187

17.1.3 线性组合

我们经常希望对回归系数的线性组合进行推断,特别是对于多个类别的分类变量。例如,对于分类变量,回归系数表示其中一个组与参考类别之间的平均差异

Code
lm_multiple <- lm(sales~TV+radio+newspaper,data = advertising)
lm_multiple
#> 
#> Call:
#> lm(formula = sales ~ TV + radio + newspaper, data = advertising)
#> 
#> Coefficients:
#> (Intercept)           TV        radio    newspaper  
#>    2.938889     0.045765     0.188530    -0.001037
library(multcomp, quietly = T)
confint(lm_multiple)
#>                   2.5 %     97.5 %
#> (Intercept)  2.32376228 3.55401646
#> TV           0.04301371 0.04851558
#> radio        0.17154745 0.20551259
#> newspaper   -0.01261595 0.01054097
# 在 R 中,我们通过首先指定一个矩阵来执行此计算,该矩阵指定要进行的比较。此处的矩阵必须具有与回归方程中的回归系数相同的列数
comparison <- matrix(c(0,3,1,1), nrow=1)

lincom <- glht(lm_multiple, linfct = comparison)
summary(lincom)
#> 
#>   Simultaneous Tests for General Linear Hypotheses
#> 
#> Fit: lm(formula = sales ~ TV + radio + newspaper, data = advertising)
#> 
#> Linear Hypotheses:
#>        Estimate Std. Error t value Pr(>|t|)    
#> 1 == 0 0.324786   0.009268   35.05   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> (Adjusted p values reported -- single-step method)
confint(lincom)
#> 
#>   Simultaneous Confidence Intervals
#> 
#> Fit: lm(formula = sales ~ TV + radio + newspaper, data = advertising)
#> 
#> Quantile = 1.9721
#> 95% family-wise confidence level
#>  
#> 
#> Linear Hypotheses:
#>        Estimate lwr    upr   
#> 1 == 0 0.3248   0.3065 0.3431

17.2 交互项

Code
lm_interact<- lm_spec %>%  fit(sales ~ .+ TV:radio, data = advertising)

lm_interact
#> parsnip model object
#> 
#> 
#> Call:
#> stats::lm(formula = sales ~ . + TV:radio, data = data)
#> 
#> Coefficients:
#> (Intercept)           id           TV        radio    newspaper     TV:radio  
#>   6.7912439   -0.0005502    0.0190783    0.0278567    0.0012488    0.0010873

17.3 变换

17.3.1 多项式

Code
rec_spec_square <- recipe(sales ~ TV, data = advertising) %>%  
  step_mutate(`TV^2` = TV^2)  
rec_spec_square

lm_wf_square <- workflow() %>%  
  add_model(lm_spec) %>%  
  add_recipe(rec_spec_square)  

lm_wf_square %>%
  fit(advertising)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#> 
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 1 Recipe Step
#> 
#> • step_mutate()
#> 
#> ── Model ───────────────────────────────────────────────────────────────────────
#> 
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#> 
#> Coefficients:
#> (Intercept)           TV       `TV^2`  
#>   6.114e+00    6.727e-02   -6.847e-05

lm(sales ~ TV+ I(TV^2),data = advertising)
#> 
#> Call:
#> lm(formula = sales ~ TV + I(TV^2), data = advertising)
#> 
#> Coefficients:
#> (Intercept)           TV      I(TV^2)  
#>   6.114e+00    6.727e-02   -6.847e-05

17.3.2 对数变换

Code
rec_spec_log <- recipe(sales ~ TV, data = advertising) %>%  
  step_log(TV)  

lm_wf_log <- workflow() %>% 
  add_model(lm_spec) %>%  
  add_recipe(rec_spec_log) 

lm_wf_log %>%
  fit(advertising)
#> ══ Workflow [trained] ══════════════════════════════════════════════════════════
#> Preprocessor: Recipe
#> Model: linear_reg()
#> 
#> ── Preprocessor ────────────────────────────────────────────────────────────────
#> 1 Recipe Step
#> 
#> • step_log()
#> 
#> ── Model ───────────────────────────────────────────────────────────────────────
#> 
#> Call:
#> stats::lm(formula = ..y ~ ., data = data)
#> 
#> Coefficients:
#> (Intercept)           TV  
#>      -4.203        3.901

lm(sales ~ log(TV),data = advertising)
#> 
#> Call:
#> lm(formula = sales ~ log(TV), data = advertising)
#> 
#> Coefficients:
#> (Intercept)      log(TV)  
#>      -4.203        3.901

17.4 回归诊断

https://www.statmethods.net/stats/rdiagnostics.html

Code
library(car)
car::scatterplotMatrix(ad)  # 多重共线性

Code
confint(lm_sales_multi$fit)  # 95%置信区间
#>                   2.5 %     97.5 %
#> (Intercept)  2.32376228 3.55401646
#> TV           0.04301371 0.04851558
#> radio        0.17154745 0.20551259
#> newspaper   -0.01261595 0.01054097
plot(lm_sales_multi$fit)

Code
autoplot(lm_sales_multi) #回归诊断图

17.4.1 线性假设

残差图, 成分残差图

Code
plot(lm_sales_multi$fit,1)  

Code
crPlots(lm_sales_multi$fit)

17.4.2 正态性假设Q-Q图

Standardized Residuals

Code
plot(lm_sales_multi$fit,2) 

Code
summary(powerTransform(lm_sales_multi$fit))  
#> bcPower Transformation to Normality 
#>    Est Power Rounded Pwr Wald Lwr Bnd Wald Upr Bnd
#> Y1    0.9074           1       0.7569       1.0578
#> 
#> Likelihood ratio test that transformation parameter is equal to 0
#>  (log transformation)
#>                            LRT df       pval
#> LR test, lambda = (0) 147.1578  1 < 2.22e-16
#> 
#> Likelihood ratio test that no transformation is needed
#>                           LRT df    pval
#> LR test, lambda = (1) 1.41991  1 0.23342
Code
plot(lm_sales_multi$fit,3)

17.4.3 误差相关性

Code
durbinWatsonTest(lm_sales_multi$fit)      #结果表明rho=0
#>  lag Autocorrelation D-W Statistic p-value
#>    1     -0.04687792      2.083648   0.554
#>  Alternative hypothesis: rho != 0

17.4.4 误差项的方差齐性

Code
ncvTest(lm_sales_multi$fit)
#> Non-constant Variance Score Test 
#> Variance formula: ~ fitted.values 
#> Chisquare = 5.355982, Df = 1, p = 0.020651
spreadLevelPlot(lm_sales_multi$fit)

#> 
#> Suggested power transformation:  1.499852
Code
tibble(
    abs_studentized_residuals=abs(rstudent(lm_sales_multi$fit)),
    fitted_values=lm_sales_multi$fit$model$sales
) %>% ggplot(aes(fitted_values,abs_studentized_residuals))+
    geom_point(pch=21)+
    geom_smooth()

17.4.5 异常观测点

Code
# studentized residual Plot
residplot<-function(fit,nbreaks=10){
  z<-rstudent(fit)
  hist(z,breaks=nbreaks,freq=FALSE)     #密度直方图
  title(xlab="Studentized Residual")
  rug(z,col="brown")                    #轴须图
  curve(dnorm(x,mean=mean(z),sd=sd(z)),add=TRUE,col="blue",lwd=2) #正态密度曲线
  lines(density(z)$x,density(z)$y,col="red",lwd=2)       #样本密度曲线
  legend("topright",c("Normal Curve","Kernel Density Curve"),#图例
  lty = c(3,2),pch = c(21,22),col=c("blue","red"),cex=.7)
}
residplot(lm_sales_multi$fit)

Code
#######################################################################
library(car)
outlierTest(lm_sales_multi$fit)            #离群点
#高杠杆值点
hat.plot<-function(fit){
  p<-length(coefficients(fit)) #模型估计的参数数目(包含截距项)
  n<-length(fitted(fit))       #样本量
  plot(hatvalues(fit),main="Index Plot of Hat Values")#帽子值
  abline(h=c(2,3)*p/n,col="red",lty=2)  #大于帽子均值p/n的2或3倍被认为是高杠杆值
  identity(1:n,hatvalues(fit),names(hatvalues(fit)))
}
hat.plot(lm_sales_multi$fit)
####强影响点
#Cook's D图形    大于4/(n-k-1)  k为预测变量数目
cutoff<-4/(nrow(advertising)-length(lm_sales_multi$fit$coefficients)-2)
{plot(lm_sales_multi$fit,which=4,cook.levels=cutoff)
abline(h=cutoff,lty=2,col="red")}
#变量添加图
avPlots(lm_sales_multi$fit,ask=FALSE,id.method="identity")

###
influencePlot(lm_sales_multi$fit,id.method="identity",main="Influence Plot")

17.4.6 多重共线性

Code
vif(lm_sales_multi$fit)
#>        TV     radio newspaper 
#>  1.004611  1.144952  1.145187

sqrt(vif(lm_sales_multi$fit))>=2       #vif平方根 ≥2 存在
#>        TV     radio newspaper 
#>     FALSE     FALSE     FALSE

17.5 逐步回归

逐步回归是筛选变量,有向前、向后和两个方向同时进行三个方法。

  • direction = "both"双向

  • direction = "backward"向后

  • direction = "forward"向前

Code
step_full <- lm(sales~ . ,data = advertising[-1])
step_lm_0 <- lm(sales~ 1 ,data = advertising[-1])

step_forward <- stats::step(step_lm_0,scope =formula(step_full),  
                            direction = "forward")
#> Start:  AIC=661.8
#> sales ~ 1
#> 
#>             Df Sum of Sq    RSS    AIC
#> + TV         1    3314.6 2102.5 474.52
#> + radio      1    1798.7 3618.5 583.10
#> + newspaper  1     282.3 5134.8 653.10
#> <none>                   5417.1 661.80
#> 
#> Step:  AIC=474.52
#> sales ~ TV
#> 
#>             Df Sum of Sq     RSS    AIC
#> + radio      1   1545.62  556.91 210.82
#> + newspaper  1    183.97 1918.56 458.20
#> <none>                   2102.53 474.52
#> 
#> Step:  AIC=210.82
#> sales ~ TV + radio
#> 
#>             Df Sum of Sq    RSS    AIC
#> <none>                   556.91 210.82
#> + newspaper  1  0.088717 556.83 212.79
summary(step_forward)
#> 
#> Call:
#> lm(formula = sales ~ TV + radio, data = advertising[-1])
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -8.7977 -0.8752  0.2422  1.1708  2.8328 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.92110    0.29449   9.919   <2e-16 ***
#> TV           0.04575    0.00139  32.909   <2e-16 ***
#> radio        0.18799    0.00804  23.382   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.681 on 197 degrees of freedom
#> Multiple R-squared:  0.8972, Adjusted R-squared:  0.8962 
#> F-statistic: 859.6 on 2 and 197 DF,  p-value: < 2.2e-16



step_backward <- stats::step(object = step_full,#scope = formula(step_lm_0) ,
                         direction = "backward")
#> Start:  AIC=212.79
#> sales ~ TV + radio + newspaper
#> 
#>             Df Sum of Sq    RSS    AIC
#> - newspaper  1      0.09  556.9 210.82
#> <none>                    556.8 212.79
#> - radio      1   1361.74 1918.6 458.20
#> - TV         1   3058.01 3614.8 584.90
#> 
#> Step:  AIC=210.82
#> sales ~ TV + radio
#> 
#>         Df Sum of Sq    RSS    AIC
#> <none>                556.9 210.82
#> - radio  1    1545.6 2102.5 474.52
#> - TV     1    3061.6 3618.5 583.10
summary(step_backward )
#> 
#> Call:
#> lm(formula = sales ~ TV + radio, data = advertising[-1])
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -8.7977 -0.8752  0.2422  1.1708  2.8328 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.92110    0.29449   9.919   <2e-16 ***
#> TV           0.04575    0.00139  32.909   <2e-16 ***
#> radio        0.18799    0.00804  23.382   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.681 on 197 degrees of freedom
#> Multiple R-squared:  0.8972, Adjusted R-squared:  0.8962 
#> F-statistic: 859.6 on 2 and 197 DF,  p-value: < 2.2e-16

step_both<- stats::step(object = step_lm_0, scope = formula(step_full) ,
                         direction = "both")
#> Start:  AIC=661.8
#> sales ~ 1
#> 
#>             Df Sum of Sq    RSS    AIC
#> + TV         1    3314.6 2102.5 474.52
#> + radio      1    1798.7 3618.5 583.10
#> + newspaper  1     282.3 5134.8 653.10
#> <none>                   5417.1 661.80
#> 
#> Step:  AIC=474.52
#> sales ~ TV
#> 
#>             Df Sum of Sq    RSS    AIC
#> + radio      1    1545.6  556.9 210.82
#> + newspaper  1     184.0 1918.6 458.20
#> <none>                   2102.5 474.52
#> - TV         1    3314.6 5417.1 661.80
#> 
#> Step:  AIC=210.82
#> sales ~ TV + radio
#> 
#>             Df Sum of Sq    RSS    AIC
#> <none>                    556.9 210.82
#> + newspaper  1      0.09  556.8 212.79
#> - radio      1   1545.62 2102.5 474.52
#> - TV         1   3061.57 3618.5 583.10
summary(step_both)
#> 
#> Call:
#> lm(formula = sales ~ TV + radio, data = advertising[-1])
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -8.7977 -0.8752  0.2422  1.1708  2.8328 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept)  2.92110    0.29449   9.919   <2e-16 ***
#> TV           0.04575    0.00139  32.909   <2e-16 ***
#> radio        0.18799    0.00804  23.382   <2e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 1.681 on 197 degrees of freedom
#> Multiple R-squared:  0.8972, Adjusted R-squared:  0.8962 
#> F-statistic: 859.6 on 2 and 197 DF,  p-value: < 2.2e-16

17.6 模型选择和优化

n是观测值的数量,p是模型的参数数(等于回归系数的数量)

\(\mathcal{L}\)是模型拟合的最大似然值

Code
########################两模型比较
lm1 <- lm_spec %>% fit(sales~TV+radio+newspaper,data = advertising)
lm2 <- lm_spec %>% fit(sales~TV*radio*newspaper,data = advertising[-1])

anova(lm2$fit,lm1$fit) #anova() 嵌套模型
Res.Df RSS Df Sum of Sq F Pr(>F)
192 169.8597 NA NA NA NA
196 556.8253 -4 -386.9656 109.3511 0
Code


##########################################            AIC 
AIC(lm2$fit,lm1$fit)  # 赤池信息准则  AIC值小的优先选择
df AIC
lm2\(fit | 9| 552.9065| |lm1\)fit 5 782.3622
Code
#BIC


####################################相对重要性##################################
ad <- scale(advertising[-1])
ad
#>                 TV        radio    newspaper        sales
#>   [1,]  0.96742460  0.979065591  1.774492530  1.548168135
#>   [2,] -1.19437904  1.080097401  0.667902716 -0.694303815
#>   [3,] -1.51235985  1.524637364  1.779084189 -0.905134512
#>   [4,]  0.05191939  1.214806480  1.283185019  0.858176766
#>   [5,]  0.39319551 -0.839506984  1.278593360 -0.215143142
#>   [6,] -1.61136487  1.726700983  2.040808751 -1.307629477
#>   [7,] -1.04295960  0.642292892 -0.323895625 -0.425973838
#>   [8,] -0.31265202 -0.246787034 -0.870303044 -0.157643861
#>   [9,] -1.61252963 -1.425491481 -1.357018896 -1.767623723
#>  [10,]  0.61450084 -1.391814211 -0.429503781 -0.655970962
#>  [11,] -0.94278982 -1.176279684 -0.291754012 -1.039299500
#>  [12,]  0.78805080  0.049572941 -1.219269126  0.647346069
#>  [13,] -1.43548537  0.797208333  1.622967784 -0.924300938
#>  [14,] -0.57705364 -1.055041512 -1.072336039 -0.828468804
#>  [15,]  0.66458573  0.649028346  0.709227646  0.954008900
#>  [16,]  0.56325118  1.645875535  1.026052116  1.605667416
#>  [17,] -0.92298882  0.898240143  3.831555755 -0.291808850
#>  [18,]  1.56494899  1.100303763  1.159210227  1.988995954
#>  [19,] -0.90668211 -0.186167948 -0.562661892 -0.521805973
#>  [20,]  0.00299927  0.042837487 -0.525928620  0.110686115
#>  [21,]  0.83114711  0.298784739  1.049010411  0.762344631
#>  [22,]  1.05245243 -1.223427861 -0.323895625 -0.291808850
#>  [23,] -1.55895045 -0.495998831  0.874527370 -1.614292308
#>  [24,]  0.94645883 -0.428644291 -0.199920832  0.283183958
#>  [25,] -0.98705089 -0.718268813 -0.562661892 -0.828468804
#>  [26,]  1.34946748 -1.331195125 -0.507561984 -0.387640985
#>  [27,] -0.04825039  0.406552002 -0.824386454  0.187351823
#>  [28,]  1.08390109 -0.442115199 -0.351445579  0.359849666
#>  [29,]  1.18523563  0.258372015 -0.351445579  0.934842473
#>  [30,] -0.89037540 -0.489263377  0.470461379 -0.675137388
#>  [31,]  1.69889695  0.339197463  0.580661195  1.414003146
#>  [32,] -0.39767985 -0.394967022  0.369444882 -0.406807411
#>  [33,] -0.58054794 -1.465904205 -0.025437791 -0.847635231
#>  [34,]  1.38091613 -0.219845218 -1.389160509  0.647346069
#>  [35,] -0.59801941 -1.472639659 -1.063152721 -0.866801658
#>  [36,]  1.67327212 -1.290782401 -1.012644472 -0.234309569
#>  [37,]  1.39605808  1.383192830 -1.173352536  2.180660223
#>  [38,] -0.84262004  1.760378253  0.695452669  0.129852542
#>  [39,] -1.21068574  0.231430199  0.208736817 -0.751803096
#>  [40,]  0.94296453  0.972330137  0.066395389  1.433169573
#>  [41,]  0.64594949 -0.064929776  0.048028753  0.494014654
#>  [42,]  0.34893444  0.682705616  0.374036541  0.589846789
#>  [43,]  1.70705030  0.298784739 -1.320285624  1.279838158
#>  [44,]  0.69719914 -1.001157880 -0.190737514 -0.215143142
#>  [45,] -1.42034342  0.164075659  0.585252854 -1.058465927
#>  [46,]  0.32680391 -0.051458868  0.043437094  0.168185396
#>  [47,] -0.66790531 -0.900126070  0.236286771 -0.655970962
#>  [48,]  1.08157156  1.228277388 -0.553478574  1.758998831
#>  [49,]  0.93364642 -0.502734285  0.888302347  0.149018969
#>  [50,] -0.93347170 -0.778887899  0.286795020 -0.828468804
#>  [51,]  0.61450084 -1.358136941  0.185778522 -0.502639546
#>  [52,] -0.54327546 -0.920332432 -1.237635762 -0.636804535
#>  [53,]  0.80785181  1.241748296  0.415361472  1.644000269
#>  [54,]  0.41416128  1.544843726  1.292368337  1.375670293
#>  [55,]  1.34713795  0.372874732 -0.672861707  1.184006023
#>  [56,]  0.60401795  1.760378253  1.352059904  1.854830966
#>  [57,] -1.62767157  0.325726555  0.498011333 -1.633458735
#>  [58,] -0.12628963 -0.273728850 -0.640720094 -0.157643861
#>  [59,]  0.74262497  1.773849161  0.328119951  1.873997393
#>  [60,]  0.74146021  0.420022910 -0.975911200  0.839010339
#>  [61,] -1.08955020 -1.432226935 -0.420320463 -1.135131635
#>  [62,]  1.33083124  1.309102836  1.108701978  1.950663100
#>  [63,]  1.07458297 -0.522940647 -0.149412583  0.321516812
#>  [64,] -0.51648587  0.426758364 -1.017236131 -0.004312446
#>  [65,] -0.18569264  1.315838290 -0.075946040  0.762344631
#>  [66,] -0.90901164 -0.940538794 -1.361610555 -0.905134512
#>  [67,] -1.34579847  0.089985665 -1.301918988 -0.866801658
#>  [68,] -0.09018192 -0.590295187 -0.934586269 -0.119311008
#>  [69,]  1.05245243  0.285313831 -0.897852997  0.934842473
#>  [70,]  0.81251087  1.389928284 -0.154004242  1.586500989
#>  [71,]  0.60634748  0.494112904  0.374036541  0.819843912
#>  [72,] -0.43378756 -0.603766095  0.052620412 -0.310975277
#>  [73,] -1.40054242  0.655763800 -0.516745302 -1.000966646
#>  [74,] -0.20549365 -1.183015138  0.034253776 -0.579305254
#>  [75,]  0.77290886  0.089985665 -0.801428159  0.570680362
#>  [76,] -1.51585415  1.376457376  2.702007645 -1.020133073
#>  [77,] -1.39238907 -1.459168751 -0.452462076 -1.365128758
#>  [78,] -0.30915772  0.352668371 -0.750919910  0.034020408
#>  [79,] -1.64980211  0.446964726 -0.971319541 -1.671791589
#>  [80,] -0.36157214 -1.048306058 -0.342262261 -0.579305254
#>  [81,] -0.82281904  0.231430199 -0.378995532 -0.425973838
#>  [82,]  1.08040679 -1.290782401  0.291386679 -0.330141704
#>  [83,] -0.83563145 -0.199638856  0.089353684 -0.521805973
#>  [84,] -0.91600023  1.430341008  0.231695112 -0.080978154
#>  [85,]  0.77407363  1.329309198  0.149045251  1.471502427
#>  [86,]  0.53762635 -0.327612482  1.613784466  0.225684677
#>  [87,] -0.82398380  0.285313831 -0.668270048 -0.387640985
#>  [88,] -0.42330468  1.167658302  1.498992991  0.379016092
#>  [89,] -0.68421201  0.150604751  1.967342208 -0.215143142
#>  [90,] -0.43378756  1.652610989  0.957177231  0.513181081
#>  [91,] -0.14842017 -1.236898769 -0.975911200 -0.540972400
#>  [92,] -1.37957665 -1.465904205  0.112311979 -1.288463050
#>  [93,]  0.82299375  0.689441070  1.306143314  1.030674608
#>  [94,]  1.20969569  0.891504689  1.916833959  1.567334562
#>  [95,] -0.46174192 -0.623972457 -0.902444656 -0.483473119
#>  [96,]  0.18936165  0.561467444  1.026052116  0.551513935
#>  [97,]  0.58887601 -1.331195125 -1.132027606 -0.445140265
#>  [98,]  0.44095087 -0.152490678 -0.392770509  0.283183958
#>  [99,]  1.66162447  1.282161020  0.947993914  2.180660223
#> [100,] -0.13793728  1.241748296  0.704635987  0.609013216
#> [101,]  0.87773770 -1.277311493  0.883710688 -0.445140265
#> [102,]  1.73966372  0.878033781  3.230048428  1.873997393
#> [103,]  1.55097181 -0.886655162 -0.420320463  0.149018969
#> [104,]  0.47589381 -0.408437930 -0.581028528  0.129852542
#> [105,]  1.06177055  0.743324702 -1.159577559  1.279838158
#> [106,] -0.10648863  1.558314633  1.306143314  0.992341754
#> [107,] -1.42150819 -0.826036076 -0.039212768 -1.307629477
#> [108,] -0.65975195 -1.546729653 -0.337670602 -1.020133073
#> [109,] -1.56011521 -1.539994199 -0.227470786 -1.671791589
#> [110,]  1.26211011  0.244901107 -1.150394241  1.107340316
#> [111,]  0.91733971 -1.014628788  1.191351840 -0.119311008
#> [112,]  1.10253732  0.992536499 -0.337670602  1.490668854
#> [113,]  0.33379250 -0.529676101 -1.292735670  0.014853981
#> [114,]  0.72864780 -0.179432494 -0.911627974  0.359849666
#> [115,] -0.80185327  1.585256449  0.181186863  0.110686115
#> [116,] -0.83796098  0.790472879  1.016868798 -0.272642423
#> [117,] -0.09134669 -0.603766095 -0.227470786 -0.349308131
#> [118,] -0.82281904 -1.513052383 -0.723369956 -0.885968085
#> [119,] -0.24858995  0.918446505  2.233658429  0.359849666
#> [120,] -1.48673502 -0.489263377 -0.378995532 -1.422628038
#> [121,] -0.06688662  0.238165653  0.718410964  0.283183958
#> [122,] -1.49372361 -0.105342500  0.911260642 -1.345962331
#> [123,]  0.89637394 -1.405285119 -0.686636684 -0.464306692
#> [124,] -0.27887383  0.763531064 -0.833569772  0.225684677
#> [125,]  0.96043601  0.608615622  2.004075480  1.088173889
#> [126,] -0.69702443 -0.772152445 -0.213695809 -0.655970962
#> [127,] -1.62184775  1.053155585  0.920443960 -1.422628038
#> [128,] -0.77855797 -1.566936015 -0.980502859 -1.000966646
#> [129,]  0.85327764  1.733436437 -1.256002398  2.046495235
#> [130,] -1.01849954 -0.758681537  0.576069536 -0.828468804
#> [131,] -1.70454606  1.100303763 -1.003461154 -2.380949385
#> [132,]  1.37625707 -1.371607849  0.571477877 -0.253475996
#> [133,] -1.61485916  0.265107469 -1.306510647 -1.595125881
#> [134,]  0.84745381  0.689441070  0.667902716  1.069007462
#> [135,] -1.28290117  1.032949223  1.609192807 -0.617638108
#> [136,] -1.15011797  1.598727357 -1.012644472 -0.464306692
#> [137,] -1.41451960  1.059891039 -0.975911200 -0.866801658
#> [138,]  1.47526209  0.379610186  1.338284927  1.299004585
#> [139,] -1.21185051  0.177546567 -0.461645394 -0.847635231
#> [140,]  0.44095087  1.389928284 -1.324877283  1.279838158
#> [141,] -0.85776198 -0.421908837 -0.810611477 -0.598471681
#> [142,]  0.54345018  0.817414695  2.068358705  0.992341754
#> [143,]  0.85560717  0.669234708  0.337303269  1.164839596
#> [144,] -0.49435534 -1.183015138  0.176595204 -0.694303815
#> [145,] -0.59219559 -0.570088825  0.383219859 -0.502639546
#> [146,] -0.07853427 -1.438962389 -0.989686177 -0.713470242
#> [147,]  1.08390109 -1.075247874 -1.003461154 -0.157643861
#> [148,]  1.12000880  1.733436437  0.631169444  2.180660223
#> [149,] -1.27008875  1.147451941 -0.856528067 -0.598471681
#> [150,] -1.19204951  0.170811113 -0.457053735 -0.751803096
#> [151,]  1.55679563 -0.630707911  0.295978338  0.398182519
#> [152,] -0.30333390 -1.001157880  0.833202439 -0.464306692
#> [153,]  0.58887601  0.002424763 -0.750919910  0.494014654
#> [154,]  0.28254284  1.107039217  0.328119951  0.954008900
#> [155,]  0.47472905 -0.145755224 -0.966727882  0.302350385
#> [156,] -1.66494405 -0.785623353 -1.141210924 -2.074286554
#> [157,] -0.61898518  1.362986468  0.915852301  0.244851104
#> [158,]  0.03211839 -1.479375113 -0.287162353 -0.751803096
#> [159,] -1.57642192  0.918446505  0.672494375 -1.288463050
#> [160,] -0.17870405 -0.327612482  0.185778522 -0.215143142
#> [161,]  0.29652002 -0.347818844  0.006703822  0.072353262
#> [162,] -0.71449590  0.844356511  0.860752393 -0.138477435
#> [163,]  0.48171764 -0.347818844 -0.227470786  0.168185396
#> [164,]  0.19169118  0.911711051 -1.063152721  0.762344631
#> [165,] -0.34759496 -0.576824279 -1.154985900 -0.406807411
#> [166,]  1.01867425 -1.337930579  2.490791332 -0.406807411
#> [167,] -1.50420650  0.965594683 -0.411137145 -1.154298062
#> [168,]  0.69603438 -1.216692407 -0.512153643 -0.349308131
#> [169,]  0.79620416  0.022631125  1.241860088  0.589846789
#> [170,]  1.59872717 -0.852977892 -1.109069311  0.187351823
#> [171,] -1.13031697 -0.785623353 -0.558070233 -1.077632354
#> [172,]  0.20333883 -0.159226132  0.773510872  0.091519689
#> [173,] -1.48440549 -0.213109764 -0.622353458 -1.230963769
#> [174,]  0.24876466 -1.088718782 -0.815203136 -0.445140265
#> [175,]  0.87773770 -1.337930579 -0.801428159 -0.483473119
#> [176,]  1.51253457  1.726700983  0.516377969  2.487323054
#> [177,]  1.18057657  0.467171088 -0.470828712  1.184006023
#> [178,]  0.26973043 -1.041570604  0.213328476 -0.445140265
#> [179,]  1.51020504 -1.412020573 -0.314712307 -0.425973838
#> [180,]  0.21615124 -0.893390616 -0.594803505 -0.272642423
#> [181,]  0.11132240 -1.391814211 -1.021827790 -0.675137388
#> [182,]  0.83231187 -1.203221500 -0.144820924 -0.349308131
#> [183,] -1.05810154 -1.183015138 -0.039212768 -1.020133073
#> [184,]  1.63716441  1.329309198  1.893875664  2.333991639
#> [185,]  1.24347388 -0.132284316 -0.025437791  0.685678923
#> [186,]  0.67506861  1.470753732 -0.502970325  1.644000269
#> [187,] -0.08785239 -1.425491481 -0.181554196 -0.713470242
#> [188,]  0.51316629  0.366139279 -0.567253551  0.628179642
#> [189,]  1.61852817 -0.630707911 -1.233044103  0.359849666
#> [190,] -1.49488838 -0.751946083 -0.328487284 -1.403461612
#> [191,] -1.25261728  1.201335572 -1.136619265 -0.617638108
#> [192,] -0.83330192 -0.839506984 -1.127435947 -0.790135950
#> [193,] -1.51235985 -1.290782401  0.048028753 -1.556793027
#> [194,]  0.23012842  1.261954658 -1.237635762  1.069007462
#> [195,]  0.03095363  0.830885603 -1.127435947  0.628179642
#> [196,] -1.26775922 -1.317724217 -0.769286546 -1.230963769
#> [197,] -0.61549089 -1.236898769 -1.031011108 -0.828468804
#> [198,]  0.34893444 -0.940538794 -1.109069311 -0.234309569
#> [199,]  1.59057381  1.261954658  1.636742761  2.199826650
#> [200,]  0.99071990 -0.987686972 -1.003461154 -0.119311008
#> attr(,"scaled:center")
#>        TV     radio newspaper     sales 
#>  147.0425   23.2640   30.5540   14.0225 
#> attr(,"scaled:scale")
#>        TV     radio newspaper     sales 
#> 85.854236 14.846809 21.778621  5.217457
#R平方贡献率  #相对权重 
relweights<-function(fit,...){
  R<-cor(fit$model)
  nvar<-ncol(R)
  rxx<-R[2:nvar,2:nvar]
  rxy<-R[2:nvar,1]
  svd<-eigen(rxx)
  evec<-svd$vectors
  ev<-svd$values
  delta<-diag(sqrt(ev))
  lambda<-evec %*%delta %*% t(evec)
  lambdaasq<-lambda^2
  beta<-solve(lambda) %*% rxy
  r2<-colSums(beta^2)
  rawwgt<-lambdaasq%*%beta^2
  import<-(rawwgt/r2)*100            #计算相对权重
  import<-data.frame(Weights=import)  #数据框化
  row.names(import)<-names(fit$model[2:nvar])
  import<-import[order(import$Weights),1,drop=FALSE] #升序排序
  dotchart(import$Weights,labels=row.names(import),   #点图
           xlab = "% of R-Square",pch=19,
           main="Relative Importiance of Predictor Variables ",
           sub=paste("Total R-Square =",round(r2,digits = 3)),
  ...)
return(import)
}
relweights(lm1$fit,col="blue")

Weights
newspaper 2.468097
radio 32.198236
TV 65.333667

17.7 线性可加模型

additive model

\[ Y_i=\beta_0+ \beta_1 X_i+ \beta_2 X_i^2+\epsilon_i \]

\[ Y_i=\beta_0+ \beta_1\times \log(X_i)+\epsilon_i \]

\[ Y_i=\beta_0+ \beta_1 (X_i\times W_i)+\epsilon_i \]

\[ Y_i=\beta_0+ \beta_1\times \exp(X_i)+\epsilon_i \]

\[ Y_i=\beta_0+ \beta_1\times \sin(X_i)+\epsilon_i \]