【发布时间】:2020-10-06 07:54:15
【问题描述】:
我正在尝试对按股票分组的数据集进行滚动多元回归。数据集的示例如下所示。它从 1991 年到 2019 年,包含股票回报等信息。我打算使用 36 个月的滚动回归窗口对每只股票的 EPU_Paper 上的因变量 ExcessReturn 进行回归。我还希望股票在 36 个月的滚动窗口中至少有 18 个月的收益观察。在这种情况下,ISIN 是每只股票的标识符。我还想在回归中包含 SIZE、INVEST、BM 和 OP 作为对照。数据集不包含任何 NA,除了 beta_monthly 列是所有 NA。 .
这是我尝试运行的代码。我能够让 for 循环正常工作,但没有得到想要的结果。理想情况下,我希望将 EPU_Paper 的系数附加到正确行的 df_Final 中。
我对我当前尝试的全新解决方案和变体持开放态度。
# Create date sequence
date <- seq(as.Date("1991-01-01"),as.Date("2019-12-31"), by = "month")
## Model
v <- 36 # No. of observations in rolling regression
w <- 18 # observations of stocks requred in period
df_Final$beta_monthly <- NA
for (i in 1:(length(date)-v)) {
beta.tab <- df_Final %>% filter(Date >= date[i] & Date < date[i+v]) %>%
group_by(ISIN) %>% filter(n() >= w) %>%
do(ols.model = lm(formula = ExcessReturn ~ EPU_Paper + SIZE, .)) %>%
mutate(beta_monthly = coefficients(ols.model)[2],
Date = date[v+1])
}
这是数据帧 df_Final。它包含我需要的所有数据。
structure(list(Year = c(2002, 2004, 2011, 2011, 2012, 1993, 2005,
2019, 2005, 1998), Month = c(5, 4, 12, 11, 4, 11, 7, 2, 12, 12
), ISIN = c("NO0003172207", "NO0003072803", "NO0010001118", "NO0010096985",
"NO0010052350", "NO0004031303", "NO0003733800", "NO0003049405",
"NO0003028904", "NO0004684408"), SIZE = c(1143750000, 894618192,
257727844.92, 293346266180.2, 104014912.25, 1312826651.5, 51164845865,
535492777.6, 1.2465e+10, 8815671800), BM = c(2.69336652499494e-06,
6.25913195949328e-07, 3.0680673824874e-07, 9.99841307356348e-07,
3.99901247813628e-06, 3.6136784151303e-06, 6.27009692475242e-07,
6.397720392755e-07, 1.985559566787e-07, 9.2518383241951e-07),
OP = c(-0.0259646808923766, 0.197313839816668, 0.136649432305334,
0.594948150836374, -0.0018535993529254, -0.0801364023870418,
0.130539826349566, 0.0244477246423, 0.620295983086681, 0.103857566765579
), INVEST = c(0.129154816408376, 0.0321275661230328, -0.092547902189399,
0.142434794968375, -0.121033439243494, -0.00124744840099796,
-0.240237999927217, 0.0376008757633188, 0.060294968189705,
0.112664489390554), MonthlyReturn = c(-0.039797852179406,
-0.066030013642565, 0.019230769230769, 0.049271412097704,
-0.12516823687752, -0.02219755826859, 0.057851239669421,
-0.043636363636364, 0.05232436939754, 0.32743529766845),
RiskFreeRate = c(0.00558, 0.00163, 0.00209, 0.00251, 0.00163,
0.00467, 0.00181, 0.00086, 0.00208, 0.00726), ShareTurnover = c(69750L,
5250L, 369135L, 183793926L, 54869L, 2879656L, 7957362L, 367551L,
2478662L, 2245928L), MarketExcessReturn = c(-2.7155, -3.0781,
1.0322, -0.3552, -0.9447, -4.9307, 6.0359, 3.8371, 6.932,
-0.7896), ExcessReturn = c(-4.5377852179406, -6.7660013642565,
1.7140769230769, 4.6761412097704, -12.679823687752, -2.686755826859,
5.6041239669421, -4.4496363636364, 5.024436939754, 32.017529766845
), TradeDate = structure(c(11838, 12538, 15338, 15308, 15460,
8734, 12993, 17955, 13147, 10590), class = "Date"), GR_SIZE = structure(c(3L,
2L, 1L, 3L, 1L, 2L, 3L, 1L, 3L, 3L), .Label = c("1", "2",
"3"), class = "factor"), GR_OP = structure(c(1L, 2L, 2L,
3L, 1L, 1L, 2L, 1L, 3L, 1L), .Label = c("1", "2", "3"), class = "factor"),
GR_BM = structure(c(3L, 2L, 1L, 3L, 3L, 3L, 2L, 2L, 1L, 3L
), .Label = c("1", "2", "3"), class = "factor"), GR_INVEST = structure(c(3L,
2L, 1L, 3L, 1L, 1L, 1L, 2L, 2L, 2L), .Label = c("1", "2",
"3"), class = "factor"), SIZE_BM = structure(c(9L, 5L, 1L,
9L, 3L, 6L, 8L, 2L, 7L, 9L), .Label = c("11", "12", "13",
"21", "22", "23", "31", "32", "33"), class = "factor"), SIZE_OP = structure(c(7L,
5L, 2L, 9L, 1L, 4L, 8L, 1L, 9L, 7L), .Label = c("11", "12",
"13", "21", "22", "23", "31", "32", "33"), class = "factor"),
SIZE_INVEST = structure(c(9L, 5L, 1L, 9L, 1L, 4L, 7L, 2L,
8L, 8L), .Label = c("11", "12", "13", "21", "22", "23", "31",
"32", "33"), class = "factor"), Date = structure(c(11808,
12509, 15309, 15279, 15431, 8705, 12965, 17928, 13118, 10561
), class = "Date"), EPU_Paper = c(53.995111032374, 68.0510031873012,
150.261825109363, 124.78265498286, 47.2994312059608, 164.273390295025,
27.168222382902, 181.297305839429, 29.292072793154, 139.423199892468
), beta_monthly = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA
)), row.names = c(NA, -10L), class = "data.frame")
【问题讨论】:
-
您之前好像问过这个问题:Rolling multiple regression panel data
-
正确,但我想详细说明问题,因此发现我可以再次发布它而不是编辑以前的
标签: r linear-regression rolling-computation