是否有一个 R 函数来聚合线性模型的交互效应系数？答案

【问题标题】：Is there an R function to aggregate coefficients of interaction effects of a linear model?是否有一个 R 函数来聚合线性模型的交互效应系数？
【发布时间】：2020-10-23 10:43:44
【问题描述】：

我正在寻找一种自动执行线性回归的方法，并控制变量的影响。因此，我想：

使用带有某个因子 A 的交互项创建一个线性模型。
将给定因子 A 的频率分配给交互项。
聚合系数，使输出不包含该因子。因此，最终系数计算为因子频率的加权平均值。

下面是我想要实现的示例。

#dataset used
mtcars

#base model with drat and wt as predictors
model_simple <- lm(mpg~ drat + wt, data = mtcars)
coefs_simple <- model_simple$coefficients

#model with interaction terms, controlling for the effect of vs
mtcars$vs <- as.factor(mtcars$vs)
model_int <- lm(mpg~ vs*drat + vs*wt, data = mtcars)

coefs_int <- model_int$coefficients
coefs_int

#frequencies of 'vs' levels
vs_tab <- table(mtcars$vs)
vs_tab

#coefficients of the second model
drat_coef <- (vs_tab['0']/sum(vs_tab))*coefs_int['drat'] + (vs_tab['1']/sum(vs_tab))*(coefs_int['drat'] + coefs_int['vs1:drat'])
drat_coef
wt_coef <- (vs_tab['0']/sum(vs_tab))*coefs_int['wt'] + (vs_tab['1']/sum(vs_tab))*(coefs_int['wt'] + coefs_int['vs1:wt'])
wt_coef

coefs_controlled = c(coefs_int['(Intercept)'], 'drat' = as.numeric(drat_coef), wt_coef)

comparison <- data.frame(simple_model_coefs = coefs_simple,
                         coefs_controlled = coefs_controlled)

comparison

sn-p 中的数据集和模型毫无意义，我要求编程工具来自动化该过程。如果它不在 base R 中，也许有一些 tidymodels 解决方案？

【问题讨论】：

标签： r linear-regression tidymodels

【解决方案1】：

下面是杂乱无章的版本，准备得很快，但确实有效；）适用于因子值中的空格和跨多个因子。假设因子列与交互有关，因此请根据您自己的使用情况进行调整。该解决方案的弱点是基于列名称，因此请确保它们相当独特。

#dataset used
mtcars
rm(list= ls())


library(stringi)
library(dplyr)

get_summarised_coeffs  = function(lin_mod) {
  coefs_int <- lin_mod$coefficients
  # # set names
  explanatory_names = rownames(summary(lin_mod)$ coefficients)
  interactions_bool = (lapply(stri_split_fixed(explanatory_names,":"),length) )==2
  factors_data_cols = (lin_mod$model %>% lapply(.,class) %>% unlist)[(lin_mod$model %>% lapply(.,class) %>% unlist) =='factor'] %>% names
  non_factor_data_cols = setdiff(lin_mod$model %>% names() ,factors_data_cols)
  target_colname = ((lin_mod$call %>% as.character )[2] %>% stri_split_fixed( ., "~"))[[1]][1] %>% stri_trim_both()
  non_factor_expl_cols = setdiff(non_factor_data_cols , target_colname)
  # get weights of factors
  weights_interaction_columns = factors_data_cols %>% lapply(.,function(x) {
    return(table(lin_mod$model[[x]]) / length(lin_mod$model[[x]]))
  } )
  names(weights_interaction_columns) = factors_data_cols
  
  # iterate over weights
  weighted_intercepts = weights_interaction_columns %>% seq_along() %>% lapply(., function(iter) {
    # set weights
    factor_weights = weights_interaction_columns[[iter]]
    factor_name = names(weights_interaction_columns)[iter]
    names(factor_weights) = paste0(factor_name,names(factor_weights))
    
    
    # iterate over non factor columns to adjust with according weight
    non_factor_expl_cols %>% lapply(.,function(x) {
      interaction_name = (names(coefs_int)[grepl (x,names(coefs_int)) & grepl (factor_name,names(coefs_int))]) 
      factor_value_name = interaction_name %>% stri_split_fixed(.,":") %>% unlist
      factor_value_name=factor_value_name[grepl(factor_name,factor_value_name)]
      if (length(factor_value_name) > 0 )
        data.frame('colname' =x,  'factor' = factor_name , 'weighted_coeff' = factor_weights[factor_value_name] * coefs_int[interaction_name])
    }) 
  } ) %>% do.call(bind_rows,.)
  
  
  
  # sum up coeffs of according column 
  result = non_factor_expl_cols %>% lapply(.,function(colname_effect) { 
    coefs_int[colname_effect] + weighted_intercepts[weighted_intercepts$colname  == colname_effect,]$weighted_coeff %>% sum
  }) %>% unlist
  
  
  if (!is.na( coefs_int['(Intercept)']))  result = c(   coefs_int['(Intercept)'],result )
  return(result)
}



#base model with drat and wt as predictors
model_simple <- lm(mpg~ drat + wt, data = mtcars)

#model with interaction terms, controlling for the effect of vs
mtcars$vs <- as.factor(mtcars$vs)
model_int <- lm(mpg~ vs*drat + vs*wt, data = mtcars)
get_summarised_coeffs(model_int)


#check with multiple factors
mtcars$am <- as.factor(mtcars$am)
model_int <- lm(mpg~ vs*drat + wt*am, data = mtcars)
get_summarised_coeffs(model_int)

【讨论】：

Widzisz Adam，nawet stacka specjalnie założyłem ;P
gościu, myślałem, że jakiś one-liner może jest :D ale dziękuję bardzo