【问题标题】:how to add a new column based on certain conditions with tidyverse?如何使用 tidyverse 根据某些条件添加新列?
【发布时间】:2020-12-08 18:57:49
【问题描述】:

我正在尝试根据受访者是否健康创建一个新列。

这里是我的数据类型:

test <- structure(list(`cutree(hc_diana, k = 4)` = c(1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L
), id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "8a594e9340", 
"8a594e9340"), covid_tested = c("positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive", "positive", 
"positive", "positive", "positive", "positive", "positive"), 
    age = c(51, 51, 51, 51, 51, 51, 51, 51, 51, 28, 28, 28, 28, 
    28, 28, 28, 28, 28, 28, 28), gender = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("Female", "Male", "Other"), class = "factor"), 
    number_morbidities = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 
    0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    diarrhoea = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), fatigue = structure(c(2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    headache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), 
    muscle_ache = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    nausea_vomiting = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    sore_throat = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), sputum = structure(c(1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    loss_appetite = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), chest_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    itchy_eyes = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("No", 
    "Yes"), class = "factor"), joint_pain = structure(c(1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), 
    comorbidities = c("asthma", "diabetes_type_one", "diabetes_type_two", 
    "obesity", "hypertension", "heart_disease", "lung_condition", 
    "liver_disease", "kidney_disease", "asthma", "diabetes_type_one", 
    "diabetes_type_two", "obesity", "hypertension", "heart_disease", 
    "lung_condition", "liver_disease", "kidney_disease", "asthma", 
    "diabetes_type_one"), bolean_yes_no = c("No", "No", "No", 
    "Yes", "No", "No", "No", "No", "No", "No", "No", "No", "No", 
    "No", "No", "No", "No", "No", "No", "No")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))

我有 15 行,其中包含 3 个唯一 ID 然而,我想根据几个条件获得新列:

  1. 如果他们有合并症 -> 则仅选择存在该合并症的行并将其添加到具有其名称的新列中,但所有其他应具有 NA
  2. 如您所见,第二个 id 没有任何合并症,因此我想要一个新类别并将其视为“健康”类别,而与该患者相关的其余行显示为 NA。第三响应者也是如此。

如何使用 tidyverse 做到这一点?

我希望新列的外观示例在这里,请查看总结上述要点的最后一列。

structure(list(id = c("117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", "117dbbbf15", 
"117dbbbf15", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", "3c8bfb6fc3", 
"8a594e9340", "8a594e9340"), number_morbidities = c(1, 1, 1, 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1), chills = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), cough = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), diarrhoea = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), fatigue = structure(c(2L, 
2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), headache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_smell_taste = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 2L, 2L), .Label = c("No", "Yes"), class = "factor"), muscle_ache = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nasal_congestion = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), nausea_vomiting = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), shortness_breath = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sore_throat = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), sputum = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), temperature = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), loss_appetite = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), chest_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), itchy_eyes = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), joint_pain = structure(c(1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L), .Label = c("No", "Yes"), class = "factor"), comorbidities = c("asthma", 
"diabetes_type_one", "diabetes_type_two", "obesity", "hypertension", 
"heart_disease", "lung_condition", "liver_disease", "kidney_disease", 
"asthma", "diabetes_type_one", "diabetes_type_two", "obesity", 
"hypertension", "heart_disease", "lung_condition", "liver_disease", 
"kidney_disease", "asthma", "diabetes_type_one"), bolean_yes_no = c("No", 
"No", "No", "Yes", "No", "No", "No", "No", "No", "No", "No", 
"No", "No", "No", "No", "No", "No", "No", "No", "No"), morbiditiy_healthy = c(NA, 
NA, NA, "obesity", NA, NA, NA, NA, NA, "healthy", NA, NA, NA, 
NA, NA, NA, NA, NA, "healthy", NA)), row.names = c(NA, -20L), class = c("tbl_df", 
"tbl", "data.frame"))

【问题讨论】:

  • 我想先保留它以便我交叉检查,然后再丢弃它。我相信我可以用 drop_na() 函数丢弃它
  • 是的,你说得对!
  • 我可以检查 number_morbidities 和合并症,还可以检查 bolean_yes_no

标签: r dplyr


【解决方案1】:

我们按'id'分组,用case_when创建'morbidity_healthy',我们在'bolean_yes_no'列中检查'Yes',如果是TRUE,则得到相应的'comorbidities',if在那里不是 (!) any 'Yes' 且 row_number 为 1,然后返回该行的 'healthy'

library(dplyr)
test %>% 
    group_by(id) %>%
    mutate(morbidity_healthy = case_when(bolean_yes_no == 'Yes' ~ comorbidities,
      (!any(bolean_yes_no == 'Yes')) & row_number()==1 ~ 'healthy')) 

【讨论】:

  • 一如既往,酷!!有效!你到底怎么知道这么多!!
猜你喜欢
  • 1970-01-01
  • 2021-12-30
  • 2019-05-22
  • 2013-03-31
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 1970-01-01
  • 2021-10-10
相关资源
最近更新 更多