【问题标题】:Rolling multiple regression panel data滚动多元回归面板数据
【发布时间】:2021-01-20 04:34:58
【问题描述】:

我正在尝试对过去 36 个月的时间 t 执行滚动回归,以观察其中 18 个月的公司,但我无法使该功能正常工作。我只想要 var1 的系数。 X, y, z 是控制变量。

这是我尝试运行的数据和代码示例。

structure(list(Year = c(2018, 2014, 2008, 2004, 2005, 2002, 2010, 
2008, 2013, 1998), Month = c(6, 12, 4, 6, 4, 8, 12, 11, 3, 3), 
    ISIN = c("NO0004895103", "NO0010571680", "NO0010010473", 
    "NO0003079709", "NO0003117202", "NO0003073801", "NO0010379266", 
    "NO0004913609", "NO0003072407", "NO0003679102"), SIZE = c(3637822300, 
    1.155e+10, 1925631048.5, 519688494.5, 790931587.08, 127597142.4, 
    2892152759.3, 554108244.7, 73484767, 375946560), BM = c(7.47698486077218e-07, 
    7.92202995594714e-07, 1.9900582030005e-07, 3.59385930497676e-07, 
    6.54209144403066e-07, 6.72227955087816e-07, 1.43273711611857e-06, 
    5.15824596832591e-07, 1.23252873661613e-06, 2.79726876987137e-07
    ), OP = c(0.145781283498513, 0.0433749257278669, 0.130477272126991, 
    0.211294037715838, 0.127523990874984, -0.0185266025249259, 
    -0.0822835066759631, 0.128844560922287, -0.0155446815227611, 
    0.416024745410667), INVEST = c(1.58608106515088, 0.00307252384303782, 
    0.33925195328069, -0.0984424241606425, 0.333479950150282, 
    0.13302885162465, -0.035773912311751, 0.408569401011161, 
    -0.206778240645154, -0.177187857233583), MonthlyReturn = c(-0.019509251810137, 
    0.095308641975309, 0.14864864864865, -0.034364261168385, 
    -0.064512147964095, -0.14080459770115, 0.13580244980708, 
    -0.1890214797136, -0.045226130653266, 0.39682527166336), 
    RiskFreeRate = c(0.00064, 0.00117, 0.00532, 0.00163, 0.00166, 
    0.00594, 0.00206, 0.00499, 0.00153, 0.00332), ShareTurnover = c(31649L, 
    907793L, 5318465L, 1831390L, 8956640L, 302000L, 7333090L, 
    516000L, 75553L, 2836550L), MarketExcessReturn = c(1.0813, 
    2.7638, 11.5701, 6.4672, -3.3108, -1.8598, 10.5445, -9.0969, 
    -0.0793, 10.8305), ExcessReturn = c(-2.0149251810137, 9.4138641975309, 
    14.332864864865, -3.5994261168385, -6.6172147964095, -14.674459770115, 
    13.374244980708, -19.40114797136, -4.6756130653266, 39.350527166336
    ), TradeDate = structure(c(17711, 16434, 13999, 12599, 12902, 
    11929, 14973, 14211, 15791, 10316), class = "Date"), GR_SIZE = structure(c(2L, 
    3L, 2L, 2L, 2L, 1L, 3L, 1L, 1L, 1L), .Label = c("1", "2", 
    "3"), class = "factor"), GR_OP = structure(c(2L, 1L, 2L, 
    2L, 1L, 1L, 1L, 2L, 1L, 3L), .Label = c("1", "2", "3"), class = "factor"), 
    GR_BM = structure(c(2L, 2L, 1L, 1L, 2L, 2L, 3L, 2L, 2L, 1L
    ), .Label = c("1", "2", "3"), class = "factor"), GR_INVEST = structure(c(3L, 
    1L, 3L, 1L, 3L, 3L, 2L, 3L, 1L, 1L), .Label = c("1", "2", 
    "3"), class = "factor"), SIZE_BM = structure(c(5L, 8L, 4L, 
    4L, 5L, 2L, 9L, 2L, 2L, 1L), .Label = c("11", "12", "13", 
    "21", "22", "23", "31", "32", "33"), class = "factor"), SIZE_OP = structure(c(5L, 
    7L, 5L, 5L, 4L, 1L, 7L, 2L, 1L, 3L), .Label = c("11", "12", 
    "13", "21", "22", "23", "31", "32", "33"), class = "factor"), 
    SIZE_INVEST = structure(c(6L, 7L, 6L, 4L, 6L, 3L, 8L, 3L, 
    1L, 1L), .Label = c("11", "12", "13", "21", "22", "23", "31", 
    "32", "33"), class = "factor"), Date = structure(c(17683, 
    16405, 13970, 12570, 12874, 11900, 14944, 14184, 15765, 10286
    ), class = "Date"), EPU_Paper = c(197.436482473082, 181.040599101032, 
    58.1799902251583, 62.5412044042803, 57.4799138334861, 52.420100605017, 
    61.1549363311955, 92.1818760618723, 76.2564063202547, 118.390708950295
    )), row.names = c(NA, -10L), class = "data.frame")
#merge dataframes
df_Final <- merge(df_Final, EPU_Paper, by = c("Year", "Month"))

# Create empty dataframe
date <- seq(as.Date("1991-01-31"),as.Date("2019-12-30"), by = "month")

Beta <- data.frame(ISIN = character(), Beta = numeric(), date =
                          as.Date(character()), stringsAsFactors = FALSE)

## Model
Returns_length <- 36 # No. of observations in rolling regression 
Returns_req <- 18 # observations of stocks requred in period 


for (i in 1:(length(date)-Returns_length)) {
  beta.tab <- df_Final %>% filter(TradeDate >= date[i] & TradeDate < date[i+Returns_length]) %>%
    group_by(ISIN) %>% filter(n() >= Returns_req) %>%
    do(ols.model = lm(data = df_Final, formula = ExcessReturn ~ var1 + x + y + z)) %>%
    mutate(Beta = coef(ols.model)[2]) %>% select("ISIN", "Beta") %>%
    mutate(TradeDate = date[Returns_length+i])
  Beta <- rbind(Beta, beta.tab)}

【问题讨论】:

  • 嗨 j_t。您需要包含某种类型的数据样本,以帮助我们帮助您。您可以将dput(df_Final)) 的输出复制粘贴到您的问题中。如果您认为复制粘贴的数据过多,您可以使用dput(sample_n(df_Final, size = 100))
  • 是的,对不起!我现在添加了一个数据集样本

标签: r dplyr regression


【解决方案1】:

问题中似乎有多个未定义的输入df_FinalEU_paper,但假设数据在组内按时间顺序排列,并且我们希望将lm 应用于从当前行到行 w-1 然后在最后使用注释中的一些生成数据,我们定义了一个 coef.x 函数来计算所需的系数和一个滚动函数 roll 使用它(这里宽度为 2),然后通过群g

library(zoo)

w <- 2
nr <- nrow(DF)
coef.x <- function(ix) coef(lm(y ~ x + x1, DF, subset = ix))[["x"]]
roll <- function(ix) rollapplyr(ix, w, coef.x, fill = NA)
transform(DF, coef.x = ave(1:nr, g, FUN = roll))

给予:

  y              x            x1 g        coef.x
1 1 -0.56047564655  0.4609162060 1            NA
2 2 -0.23017748948 -1.2650612346 1  3.0275676040
3 3  1.55870831415 -0.6868528519 1  0.5590071753
4 4  0.07050839142 -0.4456619701 2            NA
5 5  0.12928773516  1.2240817974 2 17.0127792594
6 6  1.71506498688  0.3598138271 2  0.6306055904

这也可以使用 dplyr 编写,其中 wrollcoef.x 来自上方:

library(dplyr, exclude = c("filter", "lag"))
library(zoo)

DF %>%
  group_by(g) %>%
  mutate(coef.x = roll(cur_group_rows())) %>%
  ungroup

注意

# test data
set.seed(123)
DF <- data.frame(y = 1:6, x = rnorm(6), x1 = rnorm(6), 
  g = c(1, 1, 1, 2, 2, 2))

【讨论】:

  • 我在这里制定了一个更详细的新帖子:stackoverflow.com/questions/64221542/… 不知何故,我根本无法执行您的建议
  • 启动一个新的 R 实例,将输入复制并粘贴到最后的注释中,然后将代码复制并粘贴到答案的正文中。您可以看到它有效,因为答案显示了我们这样做时得到的 R 的输出。
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 2019-02-18
  • 2021-03-04
  • 2017-03-12
  • 1970-01-01
  • 1970-01-01
  • 2021-08-25
  • 2021-08-05
相关资源
最近更新 更多