【发布时间】:2017-10-14 17:45:27
【问题描述】:
请参考数据的输出。您可以直接向下滚动到目标和问题陈述。也许您不需要数据,因为您之前可能遇到过这个问题。
调用所需的库
library(zoo)
library(ggplot2)
library(scales)
library(plotly)
library(ggthemes)
library(forecast)
library(plotly)
library(DescTools)
数据输入
dput(ridership.ts)
structure(c(1709L, 1621L, 1973L, 1812L, 1975L, 1862L, 1940L,
2013L, 1596L, 1725L, 1676L, 1814L, 1615L, 1557L, 1891L, 1956L,
1885L, 1623L, 1903L, 1997L, 1704L, 1810L, 1862L, 1875L, 1705L,
1619L, 1837L, 1957L, 1917L, 1882L, 1933L, 1996L, 1673L, 1753L,
1720L, 1734L, 1563L, 1574L, 1903L, 1834L, 1831L, 1776L, 1868L,
1907L, 1686L, 1779L, 1776L, 1783L, 1548L, 1497L, 1798L, 1733L,
1772L, 1761L, 1792L, 1875L, 1571L, 1647L, 1673L, 1657L, 1382L,
1361L, 1559L, 1608L, 1697L, 1693L, 1836L, 1943L, 1551L, 1687L,
1576L, 1700L, 1397L, 1372L, 1708L, 1655L, 1763L, 1776L, 1934L,
2008L, 1616L, 1774L, 1732L, 1797L, 1570L, 1413L, 1755L, 1825L,
1843L, 1826L, 1968L, 1922L, 1670L, 1791L, 1817L, 1847L, 1599L,
1549L, 1832L, 1840L, 1846L, 1865L, 1966L, 1949L, 1607L, 1804L,
1850L, 1836L, 1542L, 1617L, 1920L, 1971L, 1992L, 2010L, 2054L,
2097L, 1824L, 1977L, 1981L, 2000L, 1683L, 1663L, 2008L, 2024L,
2047L, 2073L, 2127L, 2203L, 1708L, 1951L, 1974L, 1985L, 1760L,
1771L, 2020L, 2048L, 2069L, 1994L, 2075L, 2027L, 1734L, 1917L,
1858L, 1996L, 1778L, 1749L, 2066L, 2099L, 2105L, 2130L, 2223L,
2174L, 1931L, 2121L, 2076L, 2141L, 1832L, 1838L, 2132L), .Tsp = c(1991,
2004.16666666667, 12), class = "ts")
创建ts对象的数据框以使用ggplot
tsd = data.frame(time = as.Date(ridership.ts),
value = as.matrix(ridership.ts))
建立线性模型
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2))
向现有数据框 tsd 添加新列
tsd$linear_fit = as.matrix(ridership.lm$fitted.values)
定义验证和训练周期的长度
nValid = 36
nTrain = length(ridership.ts) - nValid
训练数据
train.ts = window(ridership.ts,
start = c(1991, 1),
end = c(1991, nTrain))
验证数据
valid.ts = window(ridership.ts,
start = c(1991, nTrain + 1),
end = c(1991, nTrain + nValid))
建筑模型
ridership.lm = tslm(train.ts ~ trend + I(trend^2))
使用我们的构建模型进行预测
ridership.lm.pred = forecast(ridership.lm, h = nValid, level = 0)
为拟合的模型值制作数据框
tsd_train_model = data.frame(time = as.Date(train.ts),
lm_fit_train = as.matrix(ridership.lm$fitted.values))
为绘图目的制作数据框
forecast_df = data.frame(time = as.Date(valid.ts),
value = as.matrix(ridership.lm.pred$mean))
使用 ggplot 创建绘图
p1 = ggplot(data = tsd,
aes(x = time, y = value)) +
geom_line(color = 'blue') +
ylim(1300, 2300) +
geom_line(data = tsd_train_model,
aes(x = time, y = lm_fit_train),
color = 'red')
p2 = p1 +
geom_line(data = forecast_df,
aes(x = time, y = value),
col = 'red', linetype = 'dotted') +
scale_x_date(breaks = date_breaks('1 years'),
labels = date_format('%b-%y')) +
geom_vline(xintercept = as.numeric(c(tsd_train_model[NROW(tsd_train_model), ]$time, #last date of training period
forecast_df[NROW(forecast_df), ]$time))) #last date of testing period
p3 = p2 +
annotate('text',
x = c(tsd_train_model[NROW(tsd_train_model)/2, ]$time,
forecast_df[NROW(forecast_df) / 2,]$time),
y = 2250,
label = c('Training Period', 'Validation Period'))
目的:我想在预测线的两边(图中红色虚线)加上5个百分位和95个百分位的预测误差,并对区域进行阴影处理。
我使用分位数来生成预测范围
q = quantile(ridership.lm.pred$residuals, c(.05, .95))
percentile_5 = as.numeric(q[1])
percentile_95 = as.numeric(q[2])
为预测数据添加 5 个百分位和 95 个百分位
yl = forecast_df$value + percentile_5
ym = forecast_df$value + percentile_95
问题:如果我使用下面的命令,那么它不会在整个验证期内显示阴影区域。
p3 + geom_ribbon(data = forecast_df,
aes(ymin = yl,
ymax = ym),
fill="gray30")
NROW(yl)
[1]36
sum(is.na(yl))
[1] 0
NROW(ym)
[1] 36
sum(is.na(ym))
[1] 0
尝试过的事情:如果我将 ymin 和 ymax 的值替换为任何其他值 例如,如果我使用下面的命令,那么我会得到命令正下方显示的图
p3 + geom_ribbon(data = forecast_df,
aes(ymin = rep(1750,36),
ymax = rep(2000,36),
fill="gray30"))
我的问题:
谁能告诉我图 2 中输出背后的原因?为什么 R 会给出如图 2 所示的奇怪输出?
谁能帮我用 ggplot 为整个区域着色?
【问题讨论】:
-
请在代码中注明您使用的所有包。我不认为
tslm是基本包的一部分。与将ts对象转换为Date对象相同。这将有助于其他人重现您的问题以进行故障排除。
标签: r ggplot2 time-series timeserieschart desctools