我设法通过以下方式解决了这个问题:
- 确定不同数据系列的范围
- 定义我想要推断的范围
- 通过 Hmisc 包进行实际推断
最初,我认为我只能通过一些循环来管理这个,这些循环必须逐行遍历原始数据,并且希望有一个现有的功能。
我敢肯定,你们中的许多人会以这种方式编码更高效、更好。但是还是想为有类似问题的人发布我的脚本。
require(Hmisc)
extrapol.length <- 5
test <- data.frame('Time' = c(1:100), # I didn't use this as my data was equally spread in time, if you want to use it, see the first argument in the approxExtrap-function in the secondlast line
'x' = c(rep(NA, 10), 1:30, rep(NA, 30), 1:10, rep(NA, 20)))
## Determine start and end of the continuous (non-NA) data streams
length.values <- diff(c(0, which(is.na(test[,2]))))-2 # length non-NA's
length.values <- length.values[length.values > -1]
length.nas <- diff(c(0, which(!is.na(test[,2])))) # length NA's
length.nas <- length.nas[length.nas > 1]
if(is.na(test[1,2])){
# data starts with NA
length.nas <- data.frame('Order' = seq(1, length(length.nas)*2, by = 2),
'Length' = length.nas, 'Type' = 'na')
length.values <- data.frame('Order' = seq(2, length(length.values)*2, by = 2),
'Length' = length.values, 'Type' = 'value')
start.end <- rbind(length.nas, length.values)
start.end <- start.end[order(start.end$Order),]
value.seqs <- data.frame('no' = c(1:length(start.end$Type[start.end$Type == 'na'])),
'start' = NA, 'end' = NA)
for(a in value.seqs$no){
value.seqs$start[a] <- sum(start.end$Length[1:((a*2)-1)])
value.seqs$end[a] <- sum(start.end$Length[1:(a*2)])
}
}else{
# Data starts with actual values
length.nas <- data.frame('Order' = seq(2, length(length.nas)*2, by = 2),
'Length' = length.nas, 'Type' = 'na')
length.values <- data.frame('Order' = seq(1, length(length.values)*2, by = 2),
'Length' = length.values, 'Type' = 'value')
start.end <- rbind(length.nas, length.values)
start.end <- start.end[order(start.end$Order),]
value.seqs <- data.frame('no' = c(1:length(start.end$Type[start.end$Type == 'value'])),
'start' = c(1,rep(NA, (length(start.end$Type[start.end$Type == 'value'])-1))), 'end' = NA)
for(a in value.seqs$no){
value.seqs$end[a] <- sum(start.end$Length[1:((a*2)-1)])+1
if(a < max(value.seqs$no))
value.seqs$start[a+1] <- sum(start.end$Length[1:(a*2)])+1
}
}
## Do not extrapolate outside of the time-range of the original dataframe
value.seqs$start.extr <- value.seqs$start - extrapol.length
value.seqs$start.extr[value.seqs$start.extr < 1] <- 1 # do not extrapolate below time < 1
value.seqs$end.extr <- value.seqs$end + extrapol.length
value.seqs$end.extr[value.seqs$end.extr > nrow(test) | is.na(value.seqs$end.extr)] <- nrow(test)
value.seqs$end[is.na(value.seqs$end)] <- max(which(!is.na(test[,2])))
## Extrapolate
for(b in value.seqs$no){
test[c(value.seqs$start.extr[b]:value.seqs$end.extr[b]),3] <- approxExtrap(value.seqs$start[b]:value.seqs$end[b],test[c(value.seqs$start[b]:value.seqs$end[b]),2],xout=c(value.seqs$start.extr[b]:value.seqs$end.extr[b]))[2]
}
感谢您的考虑!