【发布时间】:2025-12-29 16:05:05
【问题描述】:
这是我的数据框的一个子集。
library(arsenal)
library(tidyverse)
mydata2=structure(list(Hospital = structure(c(1L, 1L, 2L, 3L, 4L, 5L,
4L, 2L, 4L, 3L, 2L, 1L, 3L, 3L, 1L, 4L, 4L, 4L, 5L, 5L, 1L, 4L,
4L, 1L, 1L, 5L, 2L, 4L, 4L, 5L, 1L, 2L, 2L, 5L, 1L, 5L, 1L, 1L,
4L, 2L, 4L, 4L, 2L, 1L, 4L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 4L, 2L,
2L, 4L, 4L, 1L, 3L, 3L, 5L, 4L, 2L, 5L, 1L, 4L, 1L, 4L, 4L, 2L,
2L, 2L, 2L, 3L, 1L, 3L, 1L, 3L, 5L, 5L, 4L, 3L, 3L, 2L, 3L, 1L,
1L, 1L, 5L, 3L, 1L, 1L, 2L, 1L, 2L, 4L, 3L, 3L, 1L, 2L, 5L, 1L,
3L, 2L, 2L, 1L, 1L, 1L, 2L), .Label = c("A", "B", "C", "D", "E"
), class = "factor"), ZipCode = structure(c(1L, 5L, 16L, 23L,
42L, 18L, 46L, 49L, 36L, 61L, 33L, 28L, 58L, 60L, 3L, 40L, 8L,
45L, 35L, 37L, 55L, 45L, 14L, 15L, 59L, 41L, 42L, 44L, 42L, 41L,
17L, 49L, 43L, 34L, 55L, 41L, 52L, 63L, 42L, 38L, 8L, 45L, 49L,
7L, 13L, 26L, 63L, 39L, 59L, 38L, 59L, 50L, 8L, 49L, 38L, 45L,
43L, 53L, 24L, 22L, 34L, 48L, 33L, 29L, 62L, 42L, 32L, 48L, 33L,
19L, 49L, 49L, 38L, 25L, 4L, 51L, 30L, 57L, 47L, 35L, 9L, 23L,
51L, 12L, 58L, 63L, 59L, 27L, 37L, 57L, 2L, 54L, 38L, 56L, 49L,
64L, 11L, 20L, 56L, 49L, 34L, 21L, 23L, 49L, 49L, 10L, 31L, 59L,
6L), .Label = c("27000", "45490", "72470", "75011", "75015",
"75018", "76480", "77270", "77340", "77350", "77380", "77440",
"77580", "77C01", "78125", "78200", "80000", "91090", "91100",
"91130", "91160", "91200", "91210", "91270", "91350", "91410",
"91540", "91700", "92000", "92220", "92310", "92350", "93000",
"93100", "93110", "93120", "93130", "93150", "93200", "93220",
"93230", "93270", "93290", "93300", "93420", "93440", "93500",
"93600", "93700", "94110", "94190", "94200", "94230", "94240",
"94250", "94270", "94290", "94310", "94400", "94480", "94520",
"94550", "94800", "95190"), class = "factor")), row.names = c(NA,
-109L), class = "data.frame")
我创建了一个函数,允许我为每家医院计算构成其入院 60% 的前 n 个邮政编码。然后我想将每个医院的邮政编码存储在一个向量列表中。
Zone<-function(choice){
mydata2<-mydata2%>%filter(Hospital==choice)
TABcode <-table(mydata2$ZipCode, useNA = "ifany")
TABcode <- freqlist(TABcode, na.options = "include", digits.pct = 1)
TABcode<-summary(sort(TABcode,decreasing = T))
TABcode<-as.data.frame(TABcode$object)
TABcode<-TABcode%>%filter(cumPercent<=60)
TABcode$Var1
}
例如,Hospital "A" 以下是占其录取率 60% 的邮政编码:
Zone(choice = "A")
[1] "94400" "94800" "94250" "94270" "27000" "45490" "72470" "75011" "75015" "76480"
[11] "77350"
所以我想为所有医院这样做。目标是将所有输出存储在一个列表中,以便以后能够提取占其入院 60% 的每家医院的邮政编码,如下所示。我需要一个允许我自动化所有这些的函数,因为我的完整数据集中有数百家医院(这只是一个子集)
$A
[1] "94400" "94800" "94250" "94270" "27000" "45490" "72470" "75011" "75015" "76480"
[11] "77350"
$B
[1] "93700"
$C
[1] "91210" "94190" "94290" "94310"
$D
[1] "93270" "93420" "77270" "93600"
$E
[1] "93100" "93230"
【问题讨论】:
标签: r list automation