您想要做的事情非常简单。
- 在写入时从文件中读取数据
library(tidyverse)
library(lubridate)
price_data = tribble(
~TIME, ~return,
"2005-01-05 10:15:00", 0.5,
"2005-01-05 10:16:00", 0.6,
"2005-01-05 10:17:00", 0.3,
"2005-01-05 10:18:00", 0.1,
"2005-01-05 10:19:00", 0.5,
"2005-01-05 10:20:00", 0.5,
"2005-01-05 10:21:00", 0.2,
"2005-01-05 10:22:00", 0.5,
"2005-01-05 10:23:00", 0.2,
"2005-01-05 10:24:00", 0.5
) %>% mutate(TIME = ymd_hms(TIME))
event_data = tribble(
~TIME, ~Event.Type,
"2004-12-15 12:45:00", "A",
"2005-01-05 10:20:00", "B",
"2005-01-05 10:21:00", "C",
"2005-01-05 10:23:00", "A",
"2005-10-31 11:05:00", "C"
) %>% mutate(TIME = ymd_hms(TIME))
- 准备一个函数,按照您编写的方法进行计算
f1 = function(event_time, price_data){
out = tibble(`t-1` = NA, t0 = NA, `t+1`=NA)
idx = which(price_data$TIME==event_time)
if(length(idx)==0) return(out)
if(idx<6) return(out)
if(idx>(nrow(price_data)-1)) return(out)
mt2t5 = mean(price_data$return[(idx-5):(idx-2)])
tibble(`t-1` = price_data$return[idx-1] - mt2t5,
t0 = price_data$return[idx] - mt2t5,
`t+1` = price_data$return[idx+1] - mt2t5)
}
- 进行突变
event_data %>%
mutate(data = map(TIME, f1, price_data)) %>%
unnest(data)
输出
# A tibble: 5 x 5
TIME Event.Type `t-1` t0 `t+1`
<dttm> <chr> <dbl> <dbl> <dbl>
1 2004-12-15 12:45:00 A NA NA NA
2 2005-01-05 10:20:00 B 0.125 0.125 -0.175
3 2005-01-05 10:21:00 C 0.125 -0.175 0.125
4 2005-01-05 10:23:00 A 0.175 -0.125 0.175
5 2005-10-31 11:05:00 C NA NA NA
它已经准备好了!
但是,不要错过f1 函数中的相应安全功能。这些是索引idx<6 和idx>(nrow(price_data)-1)
更新
好的,让我们尝试修改我们的函数f1,以便t1 和t2 是可以取任何值的参数。
这是更正后的代码。
library(tidyverse)
library(lubridate)
price_data = tribble(
~TIME, ~return,
"2005-01-05 10:10:00", 0.5,
"2005-01-05 10:11:00", 0.6,
"2005-01-05 10:12:00", 0.3,
"2005-01-05 10:13:00", 0.1,
"2005-01-05 10:14:00", 0.5,
"2005-01-05 10:15:00", 0.5,
"2005-01-05 10:16:00", 0.6,
"2005-01-05 10:17:00", 0.3,
"2005-01-05 10:18:00", 0.1,
"2005-01-05 10:19:00", 0.5,
"2005-01-05 10:20:00", 0.5,
"2005-01-05 10:21:00", 0.2,
"2005-01-05 10:22:00", 0.5,
"2005-01-05 10:23:00", 0.2,
"2005-01-05 10:24:00", 0.5
) %>% mutate(TIME = ymd_hms(TIME))
event_data = tribble(
~TIME, ~Event.Type,
"2004-12-15 12:45:00", "A",
"2005-01-05 10:20:00", "B",
"2005-01-05 10:21:00", "C",
"2005-01-05 10:23:00", "A",
"2005-10-31 11:05:00", "C"
) %>% mutate(TIME = ymd_hms(TIME))
f1 = function(event_time, price_data, t1=2, t2=-2){
out = tibble(`t-1` = NA, t0 = NA, `t+1`=NA)
idx = which(price_data$TIME==event_time)
if(length(idx)==0) return(out)
if((idx+t1)<1 | (idx+t2)<1 |
(idx+t1)>nrow(price_data) | (idx+t2)>nrow(price_data) |
idx==(nrow(price_data)-1) | idx==1) return(out)
mt1t2 = mean(price_data$return[(idx+t1):(idx+t2)])
tibble(`t-1` = price_data$return[idx-1] - mt1t2,
t0 = price_data$return[idx] - mt1t2,
`t+1` = price_data$return[idx+1] - mt1t2)
}
event_data %>%
mutate(data = map(TIME, f1, price_data, 2, -8)) %>%
unnest(data)
输出
# A tibble: 5 x 5
TIME Event.Type `t-1` t0 `t+1`
<dttm> <chr> <dbl> <dbl> <dbl>
1 2004-12-15 12:45:00 A NA NA NA
2 2005-01-05 10:20:00 B 0.127 0.127 -0.173
3 2005-01-05 10:21:00 C 0.136 -0.164 0.136
4 2005-01-05 10:23:00 A NA NA NA
5 2005-10-31 11:05:00 C NA NA NA
最后,说几句。
在进行索引操作时,您必须始终小心不要超出向量或数据帧索引的允许范围。在这种情况下,我们必须确保索引始终在1: nrow (price_data) 范围内。
所以我们必须控制参数t1 和t2,如果它们导致超出允许的索引,请做出相应的反应。在这种情况下,NA 响应似乎是合适的 (if((idx+t1)<1 | ...idx==1) return(out))。
当然,索引不能是空值,当event_data tibble 中的TIME 值在price_data tibble (if(length(idx)==0) return(out)) 中找不到时会发生这种情况。
更新 2
f2 = function(event_time, price_data, t1=2, t2=-2){
out = tibble(`t-2` = NA, `t-1` = NA, t0 = NA, `t+1`=NA, `t+2`=NA)
idx = which(price_data$TIME==event_time)
if(length(idx)==0) return(out)
if((idx+t1)<1 | (idx+t2)<1 |
(idx+t1)>nrow(price_data) | (idx+t2)>nrow(price_data) |
idx==(nrow(price_data)-1) | idx==1 |
idx==(nrow(price_data)-2) | idx==2) return(out)
mt1t2 = mean(price_data$return[(idx+t1):(idx+t2)])
tibble(`t-2` = price_data$return[idx-2] - mt1t2,
`t-1` = price_data$return[idx-1] - mt1t2,
t0 = price_data$return[idx] - mt1t2,
`t+1` = price_data$return[idx+1] - mt1t2,
`t+2` = price_data$return[idx+2] - mt1t2)
}
event_data %>%
mutate(data = map(TIME, f2, price_data, 2, -8)) %>%
unnest(data)
输出
# A tibble: 5 x 7
TIME Event.Type `t-2` `t-1` t0 `t+1` `t+2`
<dttm> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 2004-12-15 12:45:00 A NA NA NA NA NA
2 2005-01-05 10:20:00 B -0.273 0.127 0.127 -0.173 0.127
3 2005-01-05 10:21:00 C 0.136 0.136 -0.164 0.136 -0.164
4 2005-01-05 10:23:00 A NA NA NA NA NA
5 2005-10-31 11:05:00 C NA NA NA NA NA