【问题标题】:Is there an R function to aggregate coefficients of interaction effects of a linear model?是否有一个 R 函数来聚合线性模型的交互效应系数?
【发布时间】:2020-10-23 10:43:44
【问题描述】:

我正在寻找一种自动执行线性回归的方法,并控制变量的影响。 因此,我想:

  1. 使用带有某个因子 A 的交互项创建一个线性模型。
  2. 将给定因子 A 的频率分配给交互项。
  3. 聚合系数,使输出不包含该因子。因此,最终系数计算为因子频率的加权平均值。

下面是我想要实现的示例。

#dataset used
mtcars

#base model with drat and wt as predictors
model_simple <- lm(mpg~ drat + wt, data = mtcars)
coefs_simple <- model_simple$coefficients

#model with interaction terms, controlling for the effect of vs
mtcars$vs <- as.factor(mtcars$vs)
model_int <- lm(mpg~ vs*drat + vs*wt, data = mtcars)

coefs_int <- model_int$coefficients
coefs_int

#frequencies of 'vs' levels
vs_tab <- table(mtcars$vs)
vs_tab

#coefficients of the second model
drat_coef <- (vs_tab['0']/sum(vs_tab))*coefs_int['drat'] + (vs_tab['1']/sum(vs_tab))*(coefs_int['drat'] + coefs_int['vs1:drat'])
drat_coef
wt_coef <- (vs_tab['0']/sum(vs_tab))*coefs_int['wt'] + (vs_tab['1']/sum(vs_tab))*(coefs_int['wt'] + coefs_int['vs1:wt'])
wt_coef

coefs_controlled = c(coefs_int['(Intercept)'], 'drat' = as.numeric(drat_coef), wt_coef)

comparison <- data.frame(simple_model_coefs = coefs_simple,
                         coefs_controlled = coefs_controlled)

comparison

sn-p 中的数据集和模型毫无意义,我要求编程工具来自动化该过程。如果它不在 base R 中,也许有一些 tidymodels 解决方案?

【问题讨论】:

    标签: r linear-regression tidymodels


    【解决方案1】:

    下面是杂乱无章的版本,准备得很快,但确实有效;) 适用于因子值中的空格和跨多个因子。假设因子列与交互有关,因此请根据您自己的使用情况进行调整。 该解决方案的弱点是基于列名称,因此请确保它们相当独特。

    #dataset used
    mtcars
    rm(list= ls())
    
    
    library(stringi)
    library(dplyr)
    
    get_summarised_coeffs  = function(lin_mod) {
      coefs_int <- lin_mod$coefficients
      # # set names
      explanatory_names = rownames(summary(lin_mod)$ coefficients)
      interactions_bool = (lapply(stri_split_fixed(explanatory_names,":"),length) )==2
      factors_data_cols = (lin_mod$model %>% lapply(.,class) %>% unlist)[(lin_mod$model %>% lapply(.,class) %>% unlist) =='factor'] %>% names
      non_factor_data_cols = setdiff(lin_mod$model %>% names() ,factors_data_cols)
      target_colname = ((lin_mod$call %>% as.character )[2] %>% stri_split_fixed( ., "~"))[[1]][1] %>% stri_trim_both()
      non_factor_expl_cols = setdiff(non_factor_data_cols , target_colname)
      # get weights of factors
      weights_interaction_columns = factors_data_cols %>% lapply(.,function(x) {
        return(table(lin_mod$model[[x]]) / length(lin_mod$model[[x]]))
      } )
      names(weights_interaction_columns) = factors_data_cols
      
      # iterate over weights
      weighted_intercepts = weights_interaction_columns %>% seq_along() %>% lapply(., function(iter) {
        # set weights
        factor_weights = weights_interaction_columns[[iter]]
        factor_name = names(weights_interaction_columns)[iter]
        names(factor_weights) = paste0(factor_name,names(factor_weights))
        
        
        # iterate over non factor columns to adjust with according weight
        non_factor_expl_cols %>% lapply(.,function(x) {
          interaction_name = (names(coefs_int)[grepl (x,names(coefs_int)) & grepl (factor_name,names(coefs_int))]) 
          factor_value_name = interaction_name %>% stri_split_fixed(.,":") %>% unlist
          factor_value_name=factor_value_name[grepl(factor_name,factor_value_name)]
          if (length(factor_value_name) > 0 )
            data.frame('colname' =x,  'factor' = factor_name , 'weighted_coeff' = factor_weights[factor_value_name] * coefs_int[interaction_name])
        }) 
      } ) %>% do.call(bind_rows,.)
      
      
      
      # sum up coeffs of according column 
      result = non_factor_expl_cols %>% lapply(.,function(colname_effect) { 
        coefs_int[colname_effect] + weighted_intercepts[weighted_intercepts$colname  == colname_effect,]$weighted_coeff %>% sum
      }) %>% unlist
      
      
      if (!is.na( coefs_int['(Intercept)']))  result = c(   coefs_int['(Intercept)'],result )
      return(result)
    }
    
    
    
    #base model with drat and wt as predictors
    model_simple <- lm(mpg~ drat + wt, data = mtcars)
    
    #model with interaction terms, controlling for the effect of vs
    mtcars$vs <- as.factor(mtcars$vs)
    model_int <- lm(mpg~ vs*drat + vs*wt, data = mtcars)
    get_summarised_coeffs(model_int)
    
    
    #check with multiple factors
    mtcars$am <- as.factor(mtcars$am)
    model_int <- lm(mpg~ vs*drat + wt*am, data = mtcars)
    get_summarised_coeffs(model_int)
    
    

    【讨论】:

    • Widzisz Adam,nawet stacka specjalnie założyłem ;P
    • gościu, myślałem, że jakiś one-liner może jest :D ale dziękuję bardzo
    猜你喜欢
    • 2021-04-09
    • 1970-01-01
    • 2018-12-03
    • 1970-01-01
    • 2021-07-28
    • 1970-01-01
    • 2020-12-10
    • 1970-01-01
    • 2020-03-01
    相关资源
    最近更新 更多