【问题标题】:Issue with mutate_at and parsing variable to function - Rmutate_at 问题并将变量解析为函数 - R
【发布时间】:2021-06-25 08:45:05
【问题描述】:

这可能是一个简单的问题,但希望能得到一些帮助!

我有一些数据:

数据

structure(list(Samid = c("AD001", "AD002", "AD004", "AD005", 
"AD008", "AD010", "AD011", "AD012", "AD013", "AD014", "AD015", 
"AD016", "AD017", "AD018", "AD019", "AD020", "AD021", "AD022", 
"AD023", "AD024", "AD025", "AD026", "AD027", "AD028"), GATA3 = c(0.07850703, 
0.07850703, 0.4477987, 0.07850703, 0.2362246, 0.44779867, 0.46578259, 
0, 0.46578259, 0.44779867, 0.24396914, 0.46578259, 0.23622459, 
0.24396914, 0.07850703, 0.07850703, 1.25391517, 0.82224747, 0.07850703, 
0.07850703, 0.07850703, 0.07850703, 0.83507423, 0.07850703), 
    IL4 = c(0, 0, 0, 0, 0, 0, 0, 1.26781758, 0, 0, 0, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0), IL4R = c(1.65301611, 0.14846188, 
    1.6307388, 0.14846188, 0.2073535, 0.14846188, 0.4656834, 
    1.48227697, 0.65075963, 0.17073914, 0.14846188, 0.14846188, 
    0.37809262, 0.17073914, 1.65301611, 0.14846188, 1.55269688, 
    0.14846188, 2.15320576, 0.17073914, 0.44340614, 0.17073914, 
    0, 0.44340614), IRF4 = c(0, 0, 0, 0, 0, 0, 2.83446844, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CD207 = c(0.80003601, 
    0.33421377, 3.4723849, 2.32021021, 0.5828276, 0.94797393, 
    0.13406957, 0.70861984, 2.25418614, 1.4883206, 2.38978722, 
    3.47193671, 0.32452279, 2.31827895, 0.80003601, 0.80003601, 
    0.50751017, 2.32021021, 3.0989443, 2.0619054, 1.05640955, 
    3.31881563, 3.37422811, 2.32021021), IL1B = c(0.20787567, 
    0, 0, 0.20787567, 0, 0.20787567, 0, 0, 0, 0.20787567, 0.20787567, 
    0, 0, 0, 0, 0, 0, 0.20787567, 0, 0.20787567, 0.20787567, 
    0.61415248, 0, 0), Clinical.diagnosis = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L, 
    3L, 3L, 3L, 2L, 3L, 3L, 2L), .Label = c("irritated", "negative", 
    "positive"), class = "factor")), row.names = c(NA, -24L), class = "data.frame")

我想对每个基因(列 2:7)运行 Mann-whitney U 与最后一列 Clinical.Diagnosis 进行比较。

我可以单独完成:

wilcox.test(data$GATA3, data$Clinical.diagnosis)

但是,我想遍历每个基因。我已经尝试过了,但是在将基因解析为函数时遇到了问题:

data %>% results=mutate_at(vars(GATA3:IL1B), ~ wilcox.test(. ~ Clinical.diagnosis))

遗憾的是不起作用。我想 ”。”参考每个基因的内容。最后,当我得到结果时,我想将结果(将作为列表)附加到原始数据框中。例如有 2 列(W = 结果,p 值 = 结果)。

不过,我的首要任务是对每个基因进行测试......

非常感谢您的帮助!

【问题讨论】:

  • 您能更具体地说明您想要的输出吗?我不明白您如何将 p 值和测试统计量作为新列附加到现有数据中,因为测试是为每个变量构建的,对吧?

标签: r dplyr tidyverse


【解决方案1】:

我认为要做到这一点,您需要将数据放入长格式(使用来自tidyrpivot_longer)。看起来数据框不喜欢在汇总数据框时包含假设对象,因此您可以将broom 包和tidy 用于数据框。 wilcox.test 函数似乎也不喜欢将因子作为输入,因此我将其转换回数字:

library(tidyverse)
library(broom)

df %>%
  mutate(Clinical.diagnosis = as.numeric(Clinical.diagnosis)) %>%
  pivot_longer(cols = GATA3:IL1B) %>%
  group_by(name) %>%
  summarise(w = tidy(wilcox.test(value, Clinical.diagnosis))) %>% 
  select(gene = name, w = statistic, p = p.value)

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties
...
#> # A tibble: 6 x 3
#>   gene      w        p
#>   <chr> <dbl>    <dbl>
#> 1 CD207   211 1.09e- 1
#> 2 GATA3     0 1.42e- 9
#> 3 IL1B      0 8.45e-10
#> 4 IL4       0 1.57e-10
#> 5 IL4R     13 8.26e- 9
#> 6 IRF4     13 1.00e- 9

完整的可重现示例,带有数据:

library(tidyverse)
library(broom)

df <- structure(list(
  Samid = c(
    "AD001", "AD002", "AD004", "AD005",
    "AD008", "AD010", "AD011", "AD012", "AD013", "AD014", "AD015",
    "AD016", "AD017", "AD018", "AD019", "AD020", "AD021", "AD022",
    "AD023", "AD024", "AD025", "AD026", "AD027", "AD028"
  ), GATA3 = c(
    0.07850703,
    0.07850703, 0.4477987, 0.07850703, 0.2362246, 0.44779867, 0.46578259,
    0, 0.46578259, 0.44779867, 0.24396914, 0.46578259, 0.23622459,
    0.24396914, 0.07850703, 0.07850703, 1.25391517, 0.82224747, 0.07850703,
    0.07850703, 0.07850703, 0.07850703, 0.83507423, 0.07850703
  ),
  IL4 = c(
    0, 0, 0, 0, 0, 0, 0, 1.26781758, 0, 0, 0, 0, 0, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  ), IL4R = c(
    1.65301611, 0.14846188,
    1.6307388, 0.14846188, 0.2073535, 0.14846188, 0.4656834,
    1.48227697, 0.65075963, 0.17073914, 0.14846188, 0.14846188,
    0.37809262, 0.17073914, 1.65301611, 0.14846188, 1.55269688,
    0.14846188, 2.15320576, 0.17073914, 0.44340614, 0.17073914,
    0, 0.44340614
  ), IRF4 = c(
    0, 0, 0, 0, 0, 0, 2.83446844, 0,
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  ), CD207 = c(
    0.80003601,
    0.33421377, 3.4723849, 2.32021021, 0.5828276, 0.94797393,
    0.13406957, 0.70861984, 2.25418614, 1.4883206, 2.38978722,
    3.47193671, 0.32452279, 2.31827895, 0.80003601, 0.80003601,
    0.50751017, 2.32021021, 3.0989443, 2.0619054, 1.05640955,
    3.31881563, 3.37422811, 2.32021021
  ), IL1B = c(
    0.20787567,
    0, 0, 0.20787567, 0, 0.20787567, 0, 0, 0, 0.20787567, 0.20787567,
    0, 0, 0, 0, 0, 0, 0.20787567, 0, 0.20787567, 0.20787567,
    0.61415248, 0, 0
  ), Clinical.diagnosis = structure(c(
    2L, 2L,
    2L, 2L, 2L, 2L, 2L, 3L, 2L, 2L, 3L, 3L, 2L, 2L, 3L, 3L, 3L,
    3L, 3L, 3L, 2L, 3L, 3L, 2L
  ), .Label = c(
    "irritated", "negative",
    "positive"
  ), class = "factor")
), row.names = c(NA, -24L), class = "data.frame")

df %>%
  mutate(Clinical.diagnosis = as.numeric(Clinical.diagnosis)) %>%
  pivot_longer(cols = GATA3:IL1B) %>%
  group_by(name) %>%
  summarise(tidy(wilcox.test(value, Clinical.diagnosis))) %>% 
  select(gene = name, w = statistic, p = p.value)
#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties

#> Warning in wilcox.test.default(value, Clinical.diagnosis): cannot compute exact
#> p-value with ties
#> # A tibble: 6 x 3
#>   gene      w        p
#>   <chr> <dbl>    <dbl>
#> 1 CD207   211 1.09e- 1
#> 2 GATA3     0 1.42e- 9
#> 3 IL1B      0 8.45e-10
#> 4 IL4       0 1.57e-10
#> 5 IL4R     13 8.26e- 9
#> 6 IRF4     13 1.00e- 9

reprex package (v1.0.0) 于 2021-06-25 创建

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多