【问题标题】:factor season has new levels 4 , when performing Arima by group in R当在 R 中按组执行 Arima 时,因子季节有新的级别 4
【发布时间】:2026-01-19 13:35:01
【问题描述】:

这里是我的数据集示例

ts=structure(list(Data = structure(c(10L, 14L, 18L, 22L, 26L, 29L, 
32L, 35L, 38L, 1L, 4L, 7L, 11L, 15L, 19L, 23L, 27L, 30L, 33L, 
36L, 39L, 2L, 5L, 8L, 12L, 16L, 20L, 24L, 28L, 31L, 34L, 37L, 
40L, 3L, 6L, 9L, 13L, 17L, 21L, 25L), .Label = c("01.01.2018", 
"01.01.2019", "01.01.2020", "01.02.2018", "01.02.2019", "01.02.2020", 
"01.03.2018", "01.03.2019", "01.03.2020", "01.04.2017", "01.04.2018", 
"01.04.2019", "01.04.2020", "01.05.2017", "01.05.2018", "01.05.2019", 
"01.05.2020", "01.06.2017", "01.06.2018", "01.06.2019", "01.06.2020", 
"01.07.2017", "01.07.2018", "01.07.2019", "01.07.2020", "01.08.2017", 
"01.08.2018", "01.08.2019", "01.09.2017", "01.09.2018", "01.09.2019", 
"01.10.2017", "01.10.2018", "01.10.2019", "01.11.2017", "01.11.2018", 
"01.11.2019", "01.12.2017", "01.12.2018", "01.12.2019"), class = "factor"), 
    client = structure(c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L), .Label = c("Horns", "Kornev"), class = "factor"), stuff = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L), .Label = c("chickens", 
    "hooves", "Oysters"), class = "factor"), Sales = c(374L, 
    12L, 120L, 242L, 227L, 268L, 280L, 419L, 12L, 172L, 336L, 
    117L, 108L, 150L, 90L, 117L, 116L, 146L, 120L, 211L, 213L, 
    67L, 146L, 118L, 152L, 122L, 201L, 497L, 522L, 65L, 268L, 
    441L, 247L, 348L, 445L, 477L, 62L, 226L, 476L, 306L)), .Names = c("Data", 
"client", "stuff", "Sales"), class = "data.frame", row.names = c(NA, 
-40L))

我想按组使用 Arima 模型执行时间序列

#if using dummy
fun_tslm <- function(x, start = "2017-01-04", freq = 12){
  tsw <- ts(x[["Sales"]], start = decimal_date(as.Date(start)), frequency = freq)
  #View(tsw)
  mytslm <- tslm(tsw ~ trend + season)
  mytslm
}

fun_forecast <- function(x, h = 14){
  residarima1 <- auto.arima(x[["residuals"]])
  residualsArimaForecast <- forecast(residarima1, h = h)
  residualsF <- as.numeric(residualsArimaForecast$mean)
  regressionForecast <- forecast(x, h = h)
  regressionF <- as.numeric(regressionForecast$mean)
  forecastR <- regressionF + residualsF
  forecastR
}

tslm_list <- lapply(group_list, fun_tslm)
fore_list <- lapply(tslm_list, fun_forecast)

当我运行这个脚本时 我得到了错误

model.frame.default 中的错误(条款,新数据,na.action = na.action, xlev = object$xlevels) : 因子季节有新的 4 级

但我确实想在可以看到的地方获得带有 Arima 指标的输出 1.预测初始值

2. 使用 CI 预测 14 个月

初始值和预测值的输出应位于两个不同的data.frame 中。 怎么做?

【问题讨论】:

  • 您似乎使用了一些外国软件包,您可以将它们添加到您的脚本中吗?此外,您的最后几行暗示使用 group_list ,之前没有出现(也许是 ts?)。

标签: r dplyr time-series forecasting arima


【解决方案1】:

你的脚本和数据中有些部分不是太清楚,所以我可以尝试给你一个部分答案,看看如何得到你想要的结果:

# I called your dataset in this way, because ts is a function
timeseries

现在,想法是将您的数据框转换为列表,列表的每个组件都是一个组,即一个时间序列。我想象每个组都是客户端 + 东西,但我们可以用不同的方式管理它:

# first the grouping variable
timeseries$group <- paste0(timeseries$client,timeseries$stuff)

# EDIT here you convert the Data class as class(date)
library(lubridate)
timeseries$Data <- dmy(timeseries$Data)

# now the list
listed <- split(timeseries,timeseries$group)

现在我们必须将列表的每个组件定义为时间序列,使用 lapplyts 函数:

 # I do not understand why all your ts start with "2017-01-04", when in the example they are not (probably because it's an example)

 # EDIT: convert the start date
 listed_ts <- lapply(listed,
                     function(x) ts(x[["Sales"]], start = ymd("2017-01-04"), frequency = 12)  ) 

    listed_ts
$`Hornschickens`
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov
17170 374  12 120 242 227 268 280 419  12 172 336

$Hornshooves
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 497 522  65 268 441 247 348 445 477  62 226 476
17171 306                                            

$KornevOysters
      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
17170 117 108 150  90 117 116 146 120 211 213  67 146
17171 118 152 122 201       

下一步是auto.arima 每个时间序列,使用lapply 逻辑:

library(forecast)
listed_arima <- lapply(listed_ts,function(x) auto.arima(x) )
# partial result
> listed_arima
$`Hornschickens`
Series: x 
ARIMA(0,0,0) with non-zero mean 

Coefficients:
          mean
      223.8182
s.e.   38.7707

sigma^2 estimated as 18188:  log likelihood=-69.03
AIC=142.06   AICc=143.56   BIC=142.86
...

现在每个有马的预测:

listed_forecast <- lapply(listed_arima,function(x) forecast(x,1) )

如果您需要将其扁平化为 data.frame,do.callrbind 帮助:

do.call(rbind,listed_forecast)

              method                            model   level     mean     lower     upper     x          series fitted     residuals 
Hornschickens "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 223.8182 Numeric,2 Numeric,2 Integer,11 "x"    Numeric,11 Numeric,11
Hornshooves   "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 336.9231 Numeric,2 Numeric,2 Integer,13 "x"    Numeric,13 Numeric,13
KornevOysters "ARIMA(0,0,0) with non-zero mean" List,18 Numeric,2 137.125  Numeric,2 Numeric,2 Integer,16 "x"    Numeric,16 Numeric,16

我认为你可以多扭曲一下以获得更好的结果。如果你想知道为什么对于这个例子,如果你在auto.arima函数中放入超过1来预测,但结果是一个常数,答案是here,同样由method一栏指出输出。

【讨论】:

  • 好答案,但是任何问题。listed_forecast
  • 第二个问题 .listed_forecast
  • ,我想问我如何通过 auto.arima 值对我的初始数据进行预测?好吧,让我们看看我的示例 Sales for 01.04.2017 Horns chickens=374。正确的?如何查看 auto.arima 模型在该日期预测的值?你明白我想要什么吗?
  • 第一个问题呢,我使用 lubridate timeseries$Data=dmy(timeseries$Data)listed_ts
  • 已编辑,要更改的内容有两处,如果不清楚请告诉我。