更新:感谢 akrun!这是一个更自动化的版本:
df %>%
pivot_longer(cols = -Gene, names_to = 'Sample') %>%
left_join(Annotation %>% mutate(Sample = str_remove(Sample, '[a-z]+'), Group = str_c('Mean_Group_', Group))) %>%
dplyr::select(-Sample) %>%
pivot_wider(names_from = Group, values_from = value, values_fn = mean)
Gene Mean_Group_A Mean_Group_B Mean_Group_C
<chr> <dbl> <dbl> <dbl>
1 G1 -0.417 -0.352 0.576
2 G2 -0.391 1.33 0.879
3 G3 -2.23 -1.89 -0.449
4 G4 1.71 0.912 -0.0216
5 G5 0.582 2.12 0.0838
第一个答案:
一种方法是使用pmap_dfr:
library(dplyr)
library(purrr)
df %>%
mutate(pmap_dfr(across(S1:S2),
~ data.frame(Mean_GroupA = mean(c(...))))) %>%
mutate(pmap_dfr(across(S3:S4),
~ data.frame(Mean_GroupB = mean(c(...))))) %>%
mutate(pmap_dfr(across(S5:S8),
~ data.frame(Mean_GroupC = mean(c(...)))))
Gene S1 S2 S3 S4 S5 S6 S7 S8 Mean_GroupA Mean_GroupB Mean_GroupC
1 G1 -2.9631013 2.128729 1.5440470 -2.2475997 -2.821792 2.789957 0.3067319 2.02967878 -0.4171862 -0.3517764 0.57614401
2 G2 -2.9881341 2.205504 0.1252083 2.5445515 2.419377 1.629860 -0.5366258 0.00373429 -0.3913150 1.3348799 0.87908623
3 G3 -2.3801595 -2.075316 -0.8376676 -2.9434839 2.162581 -1.089719 -0.5378263 -2.33118860 -2.2277379 -1.8905757 -0.44903839
4 G4 2.3143576 1.105065 1.2648060 0.5601431 -2.775243 1.145832 2.1268387 -0.58385730 1.7097114 0.9124746 -0.02160752
5 G5 -0.1625891 1.326408 1.4469643 2.8007233 2.057994 -0.041658 -2.0802055 0.39907598 0.5819093 2.1238438 0.08380163
数据:
df <- structure(list(Gene = c("G1", "G2", "G3", "G4", "G5"), S1 = c(-2.9631013,
-2.9881341, -2.3801595, 2.31435765, -0.1625891), S2 = c(2.1287289,
2.20550407, -2.0753163, 1.10506511, 1.32640774), S3 = c(1.54404698,
0.12520826, -0.8376676, 1.26480602, 1.4469643), S4 = c(-2.2475997,
2.54455148, -2.9434839, 0.56014312, 2.8007233), S5 = c(-2.8217917,
2.41937685, 2.16258073, -2.7752431, 2.05799403), S6 = c(2.78995706,
1.62985958, -1.0897194, 1.14583159, -0.041658), S7 = c(0.30673189,
-0.5366258, -0.5378263, 2.12683874, -2.0802055), S8 = c(2.02967878,
0.00373429, -2.3311886, -0.5838573, 0.39907598)), class = "data.frame", row.names = c(NA,
-5L))