下面是杂乱无章的版本,准备得很快,但确实有效;)
适用于因子值中的空格和跨多个因子。假设因子列与交互有关,因此请根据您自己的使用情况进行调整。
该解决方案的弱点是基于列名称,因此请确保它们相当独特。
#dataset used
mtcars
rm(list= ls())
library(stringi)
library(dplyr)
get_summarised_coeffs = function(lin_mod) {
coefs_int <- lin_mod$coefficients
# # set names
explanatory_names = rownames(summary(lin_mod)$ coefficients)
interactions_bool = (lapply(stri_split_fixed(explanatory_names,":"),length) )==2
factors_data_cols = (lin_mod$model %>% lapply(.,class) %>% unlist)[(lin_mod$model %>% lapply(.,class) %>% unlist) =='factor'] %>% names
non_factor_data_cols = setdiff(lin_mod$model %>% names() ,factors_data_cols)
target_colname = ((lin_mod$call %>% as.character )[2] %>% stri_split_fixed( ., "~"))[[1]][1] %>% stri_trim_both()
non_factor_expl_cols = setdiff(non_factor_data_cols , target_colname)
# get weights of factors
weights_interaction_columns = factors_data_cols %>% lapply(.,function(x) {
return(table(lin_mod$model[[x]]) / length(lin_mod$model[[x]]))
} )
names(weights_interaction_columns) = factors_data_cols
# iterate over weights
weighted_intercepts = weights_interaction_columns %>% seq_along() %>% lapply(., function(iter) {
# set weights
factor_weights = weights_interaction_columns[[iter]]
factor_name = names(weights_interaction_columns)[iter]
names(factor_weights) = paste0(factor_name,names(factor_weights))
# iterate over non factor columns to adjust with according weight
non_factor_expl_cols %>% lapply(.,function(x) {
interaction_name = (names(coefs_int)[grepl (x,names(coefs_int)) & grepl (factor_name,names(coefs_int))])
factor_value_name = interaction_name %>% stri_split_fixed(.,":") %>% unlist
factor_value_name=factor_value_name[grepl(factor_name,factor_value_name)]
if (length(factor_value_name) > 0 )
data.frame('colname' =x, 'factor' = factor_name , 'weighted_coeff' = factor_weights[factor_value_name] * coefs_int[interaction_name])
})
} ) %>% do.call(bind_rows,.)
# sum up coeffs of according column
result = non_factor_expl_cols %>% lapply(.,function(colname_effect) {
coefs_int[colname_effect] + weighted_intercepts[weighted_intercepts$colname == colname_effect,]$weighted_coeff %>% sum
}) %>% unlist
if (!is.na( coefs_int['(Intercept)'])) result = c( coefs_int['(Intercept)'],result )
return(result)
}
#base model with drat and wt as predictors
model_simple <- lm(mpg~ drat + wt, data = mtcars)
#model with interaction terms, controlling for the effect of vs
mtcars$vs <- as.factor(mtcars$vs)
model_int <- lm(mpg~ vs*drat + vs*wt, data = mtcars)
get_summarised_coeffs(model_int)
#check with multiple factors
mtcars$am <- as.factor(mtcars$am)
model_int <- lm(mpg~ vs*drat + wt*am, data = mtcars)
get_summarised_coeffs(model_int)