【发布时间】:2019-05-20 01:45:48
【问题描述】:
if (!require("pacman")) install.packages("pacman")
pacman::p_load(tidyverse, lubridate)
# Example of sample dates - these are to be used to cross check if date exists within the range
Sample.Dates = tibble(
ID = "ID",
Round = 1:3,
Start.Date = dmy(c("03/12/2018","10/12/2018","17/12/2018")),
End.Date = dmy(c("09/12/2018","16/12/2018","23/12/2018")))
# Reference dates for a particular player - "John". Need to cross check the date against Sample.Dates and attach round, start and end date columns
Ref.Dates = tibble(
ID= "ID",
Date = seq.Date(ymd("2018-12-05"), ymd("2018-12-31") , by = "day"),
Player = "John",
Rows = row_number(Date))
# Function for checking if date exists within range and then returns the round, start and end date values
Dates.Check.YN.Func = function(x){
Date = x %>% pull(Date)
Cross.Check = Sample.Dates %>% rowwise()%>%
dplyr::mutate(Match = ifelse(between(Date, Start.Date, End.Date),1,0))%>%
filter(Match == 1)%>%
ungroup()%>%
select(-Match)
left_join(x, Cross.Check, by = "ID")
}
# Applying function to each row/date using nest()
Data.Nest = Ref.Dates %>%
nest(-Rows)%>%
mutate(out = map(data,Dates.Check.YN.Func)) %>%
unnest(out) %>%
select(-data)
现在这段代码可以正常工作了。然而,这只是一个虚拟数据集,实际上我想交叉检查超过 100,000 个日期。当使用我的真实数据集执行此操作时,这需要大约 30 分钟。搜索以查看是否有人可以使用 tidyverse 解决方案(首选)或其他方式来加速我的代码。
【问题讨论】:
-
听起来像
data.table::foverlaps会有所帮助 -
你给的MWE真的太差了,很难一眼看出你想做什么。您应该写出测试输入和预期输出。
标签: r date data.table tidyverse between