如果“df1”和“df2”是第一个和第二个数据集,则根据“df1”的“时间”列创建一个分组索引(“indx”)。使用ave 和as.yearqtr 将“时间”列转换为与“df2”类似的格式
library(zoo)
indx <- cumsum(grepl('^\\d+', df1$Time))
df1$Time <- with(df1, as.numeric(ave(Time, indx, FUN= function(x) {
x[-1] <- paste (sub(' .*', '', x[1]), x[-1])
as.yearqtr(x) })))
merge 数据集,transform“时间”列(如果需要)
transform(merge(df1, df2), Time=trunc(Time))
# Time LabourProductivity DepressionCount
#1 2004 96.6 875
#2 2004 96.9 820
#3 2004 96.9 785
#4 2004 97.1 857
#5 2005 97.6 844
#6 2005 99.0 841
或使用data.table
library(data.table)
setDT(df1)[, TimeN:= as.numeric(as.yearqtr(c(Time[1L],
paste(sub(' .*', '', Time[1L]), Time[-1L])))),
list(Grp=cumsum(grepl('^\\d+', Time)))][,
Time:= TimeN][, TimeN:=NULL][]
setkey(df1, Time)[df2][, Time:=trunc(Time)][]
# Time LabourProductivity DepressionCount
#1: 2004 96.6 875
#2: 2004 96.9 820
#3: 2004 96.9 785
#4: 2004 97.1 857
#5: 2005 97.6 844
#6: 2005 99.0 841
数据
df1 <- structure(list(Time = c("2004 Q1", "Q2", "Q3", "Q4", "2005 Q1",
"Q2"), LabourProductivity = c(96.6, 96.9, 96.9, 97.1, 97.6, 99
)), .Names = c("Time", "LabourProductivity"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6"))
df2 <- structure(list(Time = c(2004, 2004.25, 2004.5, 2004.75, 2005,
2005.25), DepressionCount = c(875L, 820L, 785L, 857L, 844L, 841L
)), .Names = c("Time", "DepressionCount"), class = "data.frame",
row.names = c("1", "2", "3", "4", "5", "6"))