【发布时间】:2020-01-10 19:28:37
【问题描述】:
我在创建一个数据集时遇到了一些麻烦,该数据集在我尝试的代码下方,变量级别的平均中位数为 25% 和 75%(在我的情况下,变量是数据集 df1 中的危机_t)。问题是百分位数没有正确计算,我不明白为什么。任何想法 ?
#what I have
country <- c("AT","AT","AT","AT","BE","BE","BE","BE","DE","DE","DE")
crisis_t <- c(-1,0,1,2,-1,0,1,2,0,1,2)
value1 <- c(0.01,0.02,0.015,0.03,0.5,0.55,0.7,0.4,0.01,0.02,0.04)
df1 <- data.frame(country, crisis_t,value1)
#what I would like to obtain
crisis_t <- c(-1,0,1,2)
mean_t <- c(0.255,0.193,0.245,0.156)
median_t <- c(0.255,0.02,0.02,0.04)
perc_25 <- c(NA,0.01,0.015,0.03)
perc_75 <- c(NA,0.55,0.7,0.4)
df2 <- data.frame(crisis_t, mean_t, median_t, perc_25, perc_75)
#my code does not compute correctly the 25th quantile
df1 <- as.data.table(df1)
df2_try <- data.table()
df2_try <- df1[,mean_t2:=mean(value1, na.rm=TRUE),by=.(crisis_t)]
df2_try <- df1[,median_t2:=median(value1, na.rm=TRUE),by=.(crisis_t)]
df2_try <- df1[,perc_25:=quantile(value1, probs=0.25),by=.(crisis_t)]
df2_try <- df1[,perc_75:=quantile(value1, probs=0.75),by=.(crisis_t)]
df2_try
感谢您的帮助。
编辑:实际数据集。
country <- c("AT","AT","AT","AT","BE","BE","BE","BE","BE","BE","BE","DE","DE","DE")
crisis_AT_1 <- c(-1,0,1,2,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA)
crisis_BE_1 <- c(NA,NA,NA,NA,-1,0,1,2,3,4,5,6,NA,NA)
crisis_BE_2 <- c(NA,NA,NA,NA,-4,-3,-2,-1,0,1,2,-2,NA,NA)
crisis_DE_1 <- c(NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,NA,-1,0)
value1 <- c(0.01,0.02,0.015,0.03,0.5,0.55,0.7,0.4,0.01,0.02,0.04,0.02,0.14,0.21)
df3 <- data.frame(country, crisis_AT_1,crisis_BE_1,crisis_BE_2,crisis_DE_1,value1)
【问题讨论】:
标签: r data.table