【发布时间】:2018-07-03 16:30:22
【问题描述】:
我在 R 中有一个数据框,它有 43 个变量和超过 80 行。我想根据一个变量(地理区域)对数据进行分组,然后计算一个变量的不同值(有多少 0、1、2、3 和 NA 等)。
我知道tidyverse 中的group_by 和summarize 函数,我知道我可以使用“sum”和“mean”之类的函数,但我想数数
我试过了
est1 <- df %>%
group_by(region) %>%
summarize(count)
数据如下:
iso3 Country WHOregion WBIncomeGroup UrbanSanPol UrbanSanWom UrbanSanExt RuralSanPol RuralSanWom
<chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
1 AFG Afghanistan EMRO Low income 0 <NA> <NA> 1 1
2 ALB Albania EURO Upper middle income 1 0 0 1 0
3 ARG Argentina PAHO Upper middle income 1 0 0.5 1 0
4 AZE Azerbaijan EURO Upper middle income 1 1 0.5 1 1
5 BDI Burundi AFRO Low income 1 1 0.5 1 1
6 BFA Burkina Faso AFRO Low income 1 1 1 1 1
但这不是我想要的。有人可以帮忙吗?
structure(list(iso3 = c("AFG", "ALB", "ARG", "AZE", "BDI", "BFA",
"BGD", "BIH", "BLR", "BOL"), Country = c("Afghanistan", "Albania",
"Argentina", "Azerbaijan", "Burundi", "Burkina Faso", "Bangladesh",
"Bosnia and Herzegovina", "Belarus", "Bolivia (Plurinational State of)"
), WHOregion = c("EMRO", "EURO", "PAHO", "EURO", "AFRO", "AFRO",
"SEARO", "EURO", "EURO", "PAHO"), WBIncomeGroup = c("Low income",
"Upper middle income", "Upper middle income", "Upper middle income",
"Low income", "Low income", "Lower middle income", "Upper middle income",
"Upper middle income", "Lower middle income"), UrbanSanPol = c("0",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), UrbanSanWom = c(NA,
"0", "0", "1", "1", "1", "1", "0", NA, "0"), UrbanSanExt = c(NA,
"0", "0.5", "0.5", "0.5", "1", "0.5", "0", "0.5", "0"), RuralSanPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), RuralSanWom = c("1",
"0", "0", "1", "1", "1", "1", "0", NA, "0"), RuralSanExt = c("0.5",
"0", "0", "0.5", "0.5", "1", "0.5", "0", "0.5", "0.5"), UrbanDWPol = c("0",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), UrbanDWWom = c(NA,
"0", "0", "1", "1", "1", "1", "0", NA, "0"), UrbanDWExt = c(NA,
"0", "0.5", "1", "0", "0.5", "0.5", "0.5", "0.5", "0"), RuralDWPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), RuralDWWom = c("1",
"0", "0", "1", "1", "1", "1", "0", NA, "0"), RuralDWExt = c("0.5",
"0", "0", "1", "0.5", "1", "0.5", "0.5", "0.5", "0.5"), HygienePol = c("1",
"1", "0", "1", "1", "1", "1", "1", "1", "0"), HygieneWom = c("1",
NA, NA, "1", "1", "1", "1", "0", NA, "0"), HygieneExt = c("0.5",
NA, NA, "0", "0.5", "0", "0.5", "0", "0.5", "0"), WASHHealthPol = c("1",
"1", "0", "1", "1", "1", "1", "1", "0", "0"), WASHHealthWom = c("0",
NA, NA, "1", "1", "1", "1", "0", NA, "0"), WASHHealthExt = c("0",
NA, "0.5", "1", "0", "0.5", "0", "0", NA, "0"), WpollutionPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "0"), WpollutionWom = c("1",
NA, "0", "1", "1", "1", "1", "0", NA, "0"), WpollutionExt = c("0",
NA, "0", "1", "0", "0.5", "0", "0", "0.5", "0"), WQMPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "0"), WQMWom = c("1",
NA, "0", "1", "1", "1", "1", "0", NA, "0"), WQMExt = c("0", NA,
"0", "1", "0", "0.5", "0", "0", "0.5", "0"), WatRightPol = c("0",
"1", "1", "1", NA, "1", "1", "1", "1", "1"), WatRightWom = c("0",
NA, "0", "1", NA, "1", "1", "0", NA, "0"), WatRightExt = c("0",
NA, "0.5", "1", NA, "1", "0", "0", "0.5", "0.5"), WRMPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), WRMWom = c("0",
NA, "0", "1", "1", "1", "1", "0", NA, "0"), WRMExt = c("0", NA,
"0.5", "1", "0.5", "1", "0", "0", "0.5", "0"), EnvProtPol = c("1",
"1", "1", "1", "1", "1", "1", "1", "1", "1"), EnvProtWom = c("0",
NA, "0", "1", "1", "1", "1", "0", NA, "0"), EnvProtExt = c("0",
NA, "0", "1", "0", "1", "0", "0", "0.5", "0"), `SDG regions` = c("Central Asia (M49) and Southern Asia (MDG=M49)",
"Northern America (M49) and Europe (M49)", "Latin America and the Caribbean (MDG=M49)",
"Western Asia (M49) and Northern Africa (M49)", "Sub-Saharan Africa (M49)",
"Sub-Saharan Africa (M49)", "Central Asia (M49) and Southern Asia (MDG=M49)",
"Northern America (M49) and Europe (M49)", "Northern America (M49) and Europe (M49)",
"Latin America and the Caribbean (MDG=M49)"), M49_level1 = c("Asia (M49)",
"Europe (M49)", "Latin America and the Caribbean (MDG=M49)",
"Asia (M49)", "Sub-Saharan Africa (M49)", "Sub-Saharan Africa (M49)",
"Asia (M49)", "Europe (M49)", "Europe (M49)", "Latin America and the Caribbean (MDG=M49)"
), M49_level2 = c("Southern Asia (MDG=M49)", "Southern Europe (M49)",
"South America (M49)", "Western Asia (M49)", "Eastern Africa (M49)",
"Western Africa (M49)", "Southern Asia (MDG=M49)", "Southern Europe (M49)",
"Eastern Europe (M49)", "South America (M49)"), LDCs = c("Least Developed Countries (LDCs)",
NA, NA, NA, "Least Developed Countries (LDCs)", "Least Developed Countries (LDCs)",
"Least Developed Countries (LDCs)", NA, NA, NA), LLDCS_SIDS = c("Landlocked developing countries (LLDCs)",
NA, NA, "Landlocked developing countries (LLDCs)", "Landlocked developing countries (LLDCs)",
"Landlocked developing countries (LLDCs)", NA, NA, NA, "Landlocked developing countries (LLDCs)"
), `Income group` = c("Low income", "Upper middle income", "Upper middle income",
"Upper middle income", "Low income", "Low income", "Lower middle income",
"Upper middle income", "Upper middle income", "Lower middle income"
)), .Names = c("iso3", "Country", "WHOregion", "WBIncomeGroup",
"UrbanSanPol", "UrbanSanWom", "UrbanSanExt", "RuralSanPol", "RuralSanWom",
"RuralSanExt", "UrbanDWPol", "UrbanDWWom", "UrbanDWExt", "RuralDWPol",
"RuralDWWom", "RuralDWExt", "HygienePol", "HygieneWom", "HygieneExt",
"WASHHealthPol", "WASHHealthWom", "WASHHealthExt", "WpollutionPol",
"WpollutionWom", "WpollutionExt", "WQMPol", "WQMWom", "WQMExt",
"WatRightPol", "WatRightWom", "WatRightExt", "WRMPol", "WRMWom",
"WRMExt", "EnvProtPol", "EnvProtWom", "EnvProtExt", "SDG regions",
"M49_level1", "M49_level2", "LDCs", "LLDCS_SIDS", "Income group"
), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame"
在此处输入代码
【问题讨论】:
-
也许看看
?table?此外,如果您发布一些包含您感兴趣的变量和几行的示例数据(不是完整的数据集,只是我们重现和理解您的问题所需要的),我们可以提供更好的响应 -
不确定您希望如何分组,但可能是这样的:
lapply(split(mtcars, mtcars$cyl), function(x) lapply(x, table))?将mtcars替换为您的数据框,将$cyl替换为$region。 -
切换嵌套,
lapply(mtcars[-2], function(x) lapply(split(x, mtcars$cyl), table)),其中2是分组列的列索引。 -
请使用您的 df: dput(head(df,10)) 运行此代码并粘贴输出,以便我们可以在示例代码上运行测试
-
您发布的表格没有帮助 - 没有人愿意自己输入。请张贴您的数据。输入(头(df,10))