【问题标题】:Calculate standard deviation based on a specific time frame根据特定时间范围计算标准差
【发布时间】:2023-01-13 00:04:03
【问题描述】:

我想计算特定日期后一段时间内每日对数收益的标准差。 换句话说:我想设置一个数据(例如 2019-01-15),我想计算该日期之后 5 天的对数收益的标准差(因此从 2019-01-16 到 2019-01- 20). 问题是,股票的开始日期不同。所以我总是必须将代码链接到开始日期,然后找到随后的 5 天。

为此,我需要链接两个数据框: 数据框 1 包含股票标识符和一个特定日期(开始日期)。 数据框 2 包含一年(2020 年)期间不同股票代码的所有每日日志回报列表。 数据框 2 如下所示:

下面是重现示例的代码:

#Data frame 1: 

Identifier <- c("ACCR.PK", "ANIX.OQ", "TLRS.PK")
Dates <- c("2019-11-22", "2019-11-01", "2019-11-15")
df1 <- data.frame(Identifier, Dates)


# Data frame 2: 
 
Timeframe <- c("2019-11-04", "2019-11-05", "2019-11-06", "2019-11-07", "2019-11-08", "2019-11-09", "2019-11-10", "2019-11-11", "2019-11-12", "2019-11-13", "2019-11-14", "2019-11-15", "2019-11-16", "2019-11-17", "2019-11-18", "2019-11-19", "2019-11-20",
 "2019-11-21", "2019-11-22", "2019-11-23", "2019-11-24", "2019-11-25", "2019-11-26", "2019-11-27", "2019-11-28", "2019-11-29", "2019-11-30")

ACCR.PK <- c(-0.15415068, 0.15415068, 0.487703206, 0.782759339, -0.577315365, 0, 0.145953913, -0.01242252, -0.064538521, 0.026317308, -0.124297717, 0.097980408, -0.679901954, 0.051293294, -0.162518929, 0.028987537, 0.451985124,
 -0.09531018, 0, -0.105360516, -0.045462374, 0.022989518, 0.127833372, 0, 0.336472237, 0, -0.15415068)
 
HURC.OQ <- c(0.00252986782857967, 0.00392267244379774,-0.00673403218134361, 0.00334262149668962,  0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745, -0.00201236232924185, -0.0104137475666262, -0.00934859277129974,
 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812) 
 
ANIX.OQ <- c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639, -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, 
 -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812)

UBP.N <- c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568,
 0.0224488315394535, -0.0116960397631916, -0.00643779047484871, -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -0.000087987847382287, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942)

TLRS.PK <- c(-0.0723206615796261, -0.0645385211375711,-0.0689928714869512, 0.0689928714869512, 0.0744888519907394, -0,143481723477691, -0.0037336695520489, -0.0433188747188424, 0, -0.092709398104267, 0,123481056771021,  0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, -0,130053128248198, 0,183478294921779, 0, -0,141323855167744, -0.033855636939339, -0.0422003544903764)

df2 <- data.frame(Timeframe, UBP.N, HURC.OQ, ANIX.OQ, TLRS.PK)

我坚持结合两个数据框并找到后续日期并将它们链接到每日日志返回。

有人可以帮我吗?

【问题讨论】:

  • 你能提供第一个样本数据吗? (请不要只是一张照片。)也许张贴 dput(head(df, 17)) 的输出。 (请确保此示例数据涵盖 frame1(输出)中的日期。)
  • @r2evans - 我发布数据以重现示例。我希望这有帮助!
  • 你做到了,但是你的Timeframe是错误的,你需要"-他们,然后可能是as.Date。我为你修好了报价单。

标签: r return calculated-columns stock


【解决方案1】:

将日期转换为Date-class 后,

df1$Dates <- as.Date(df1$Dates)
df2$Timeframe <- as.Date(df2$Timeframe)

我们可以用 dplyr 做到这一点:

library(dplyr)
library(tidyr) # pivot_longer
df1 %>%
  mutate(rn = row_number()) %>%
  rowwise() %>%
  summarize(
    rn,
    Identifier,
    Timeframe = seq(Dates, Dates + 5, by = "days")
  ) %>%
  left_join(
    pivot_longer(df2, -Timeframe, names_to = "Identifier"),
    by = c("Identifier", "Timeframe")
  ) %>%
  group_by(rn, Identifier) %>%
  summarize(
    Timeframe = first(Timeframe),
    sigma = sd(value, na.rm = TRUE),
    .groups = "drop"
  )
# # A tibble: 3 × 4
#      rn Identifier Timeframe      sigma
#   <int> <chr>      <date>         <dbl>
# 1     1 ACCR.PK    2019-11-22 NA       
# 2     2 ANIX.OQ    2019-11-01  2.19e- 2
# 3     3 TLRS.PK    2019-11-15  5.04e+13

快速验证:

sd(filter(df2, between(Timeframe, df1$Dates[2], df1$Dates[2]+5))$ANIX.OQ)
# [1] 0.02188327

数据

df1 <- structure(list(Identifier = c("ACCR.PK", "ANIX.OQ", "TLRS.PK"), Dates = structure(c(18222, 18201, 18215), class = "Date")), row.names = c(NA, -3L), class = "data.frame")
df2 <- structure(list(Timeframe = structure(c(18204, 18205, 18206, 18207, 18208, 18209, 18210, 18211, 18212, 18213, 18214, 18215, 18216, 18217, 18218, 18219, 18220, 18221, 18222, 18223, 18224, 18225, 18226, 18227, 18228, 18229, 18230), class = "Date"), UBP.N = c(0.0132452267500205, 0, -0.00400802139753864, 0.00242782617802106, -0.00263922032319019, -0.0149097543662875, -0.00484002020400087, -0.00215866246803786, 0.00753501950441837, 0, -0.00807541400554568, 0.0224488315394535, -0.0116960397631916, -0.00643779047484871,  -0.00973506877075225, 0.0118856072339812, 0.00967378806172681, -0.000585246485274027, 0, 0.0179329700267874, 0.0140152826171209, -0.0040442494375279, -8.7987847382287e-05, 0, -0.00623054975063608, 0.00260078170005729, 0.0144406841547942), HURC.OQ = c(0.00252986782857967, 0.00392267244379774, -0.00673403218134361, 0.00334262149668962, 0.0131158570574628, -0.00891122577543113, 0.00669085295092264, -0.00669085295092264, -0.00420463128203163, -0.00365836907245454, -0.01534120996679, -0.00315412447869745,  -0.00201236232924185, -0.0104137475666262, -0.00934859277129974, 0.0269308298165383, 0.0237165266173163, -0.00501813152284614, -0.0109597837232012, 0.00334262149668962, -0.0119387432820877, 0.00712355199277548, 0.0216270190228793, 0.013797128357417, 0.041337071491812, 0.00733563677238935, 0.041337071491812), ANIX.OQ = c(0.00629328697578901, 0.0112290637164134, -0.0288999622523214, -0.0064572560759153, 0.0102302682508149, 0.00507615303186082, -0.0309813325455195, 0.00518304563137528, 0.00015585630080639,  -0.00260078170005729, -0.0263867551731949, -0.0437228110138317, -0.0140649294674036, -0.0200292818755725, 0.0256790144176915, 0.0236615074981583, 0.0703674421501179, -0.00256739550524565, 0, -0.0155443544378002, -0.0131407935610586, 0.00530505222969313, -0.00264900817157687, -0.00798939003347865, 0.018543577712169, 0.0182059644965724, 0.041337071491812), TLRS.PK = c(-0.0723206615796261, -0.0645385211375711, -0.0689928714869512, 0.0689928714869512, 0.0744888519907394, 0, 143481723477691, -0.0037336695520489,  -0.0433188747188424, 0, -0.092709398104267, 0, 123481056771021, 0.0394340692454072, 0.0682815074164056, 0, 0.0209496263115372, 0, 0, 130053128248198, 0, 183478294921779, 0, 0, 141323855167744, -0.033855636939339, -0.0422003544903764)), row.names = c(NA, -27L), class = "data.frame")

【讨论】:

    猜你喜欢
    • 1970-01-01
    • 2020-06-14
    • 2018-02-28
    • 1970-01-01
    • 2018-11-21
    • 1970-01-01
    • 2019-03-04
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多