【发布时间】:2019-03-27 14:07:13
【问题描述】:
我正在尝试使用 dplyr 按组过滤数据帧,其中捕获了一行中第一次出现的字符串“ReadingOnset”,并将它和所有后续行传递到一个新的数据帧中。
Text_Stimuli <- structure(list(Name = c("Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1",
"Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1", "Sub1"
), StimulusName = c("GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20", "GenLie20",
"GenLie20"), StimuliBlock = c("Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4", "Block_4", "Block_4", "Block_4", "Block_4",
"Block_4", "Block_4"), Reading_Onset = c("", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",
"", "", "", "", "", "", "", "", "ReadingOnset", "", "", "", "",
"", "", "")), row.names = c(NA, -101L), vars = c("Name", "StimulusName",
"StimuliBlock"), drop = TRUE, indices = list(0:100), group_sizes = 101L, biggest_group_size = 101L, labels = structure(list(
Name = "Innocent Subject 15", StimulusName = "GenLie20",
StimuliBlock = "Block_4"), row.names = c(NA, -1L), class = "data.frame", vars = c("Name",
"StimulusName", "StimuliBlock"), drop = TRUE), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"))
这是我一直在尝试的解决方案类型的示例,但没有成功。
Test <- Text_Stimuli %>%
group_by(Name, StimulusName, StimuliBlock)%>%
filter(!lead(cumsum(grepl("ReadingOnset", Reading_Onset)), default = 0))
如您所见,我正在尝试按Name、StimulusName 和StimuliBlock 进行分组。然后我试图在Reading_Onset 列中找到第一次出现的“ReadingOnset”,并从中返回所有前导行(包括带有“ReadingOnset”的行)。
我一直在尝试使这个解决方案适应问题的反面:https://stackoverflow.com/a/37922522/2653210
【问题讨论】:
-
您能否阅读this guide 并发布您的数据的最小子集以制作可运行的示例?
-
谢谢奥利弗。我会马上解决的!
标签: r dataframe filter dplyr grepl