【问题标题】:Add the average value of a column as a point in a line plot将列的平均值添加为折线图中的一个点
【发布时间】:2020-10-02 22:41:27
【问题描述】:

我想以特定间隔将列的平均值添加为点/文本。

我有这个情节:

我的数据框包含以下数据:

structure(list(optz = c(1067, 1067, 1067, 1067, 1067, 1067, 1067, 
1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 
1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 
1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 
1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 
1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067, 1067), e_val = c(6.1565, 
5.4915, 4.647, 3.7871, 5.2397, 5.8252, 4.647, 5.8252, 4.5617, 
4.3906, 5.0711, 5.9082, 4.7321, 4.9866, 4.8171, 3.7003, 5.4078, 
4.0467, 4.7321, 4.647, 4.647, 5.0711, 4.0467, 4.9019, 5.3238, 
4.9866, 5.2397, 4.0467, 5.2397, 5.3238, 4.7321, 3.5262, 6.1565, 
3.4389, 5.6587, 5.742, 4.7321, 5.3238, 4.7321, 4.647, 4.0467, 
5.0711, 4.4762, 4.5617, 4.9019, 4.3906, 3.2638, 4.8171, 3.2638, 
4.647, 4.3049, 3.5262, 4.3906, 4.1329, 4.4762, 4.0467, 5.1555, 
5.1555, 3.3514, 3.3514), Ctime = c(100.8656, 105.7428, 113.8558, 
108.7743, 103.1432, 111.9795, 108.8254, 107.0829, 99.2327, 100.002, 
88.4304, 105.3633, 106.6242, 98.2432, 102.5163, 110.1165, 95.3913, 
95.3288, 108.5169, 114.2813, 152.393, 164.0396, 135.2379, 161.8865, 
162.6032, 139.7406, 163.0824, 167.3038, 155.7465, 166.0113, 159.9243, 
147.9156, 153.5766, 160.7025, 168.5825, 138.6539, 140.4861, 135.3083, 
153.4059, 152.83, 214.338, 214.0513, 192.4158, 203.7792, 216.6077, 
202.8908, 205.3991, 216.3706, 215.6574, 210.9652, 190.4783, 182.6495, 
205.7483, 199.6231, 194.0155, 203.9086, 196.9656, 205.351, 186.1295, 
173.8961), step = c(20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 
20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 20L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 30L, 
30L, 30L, 30L, 30L, 30L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 
40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L, 40L), 
    method = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L), .Label = c("QV", "VN"), class = "factor")), row.names = 2:61, class = "data.frame")

我的代码是:

ggplot(data = df, aes(x = Ctime, y = e_val, group = method, color = method)) +
  stat_smooth(data = df, level = 0.8) +
  ggtitle('M1 vs M2 _ time')+
  ylab('e_val')+
  xlab('Ctime')

我想将step 列的平均值添加到轴x 上(动态)出现的间隔上。 例如对于轴 x 上显示的自动生成的间隔 {120, 160, 200}

mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 24.44
mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 28.88
    
mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 22.85
mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 25.71

所以想要的情节应该是这样的:

【问题讨论】:

  • 你试过annotate功能吗?

标签: r ggplot2 plot


【解决方案1】:

尝试这种方法计算 loess() 以获得所需的点,并使用这两个选项中的任何一个添加它们。关键是loess() 值。代码如下:

library(tidyverse)
#Code
p1 <- ggplot(data = df, aes(x = Ctime, y = e_val, group = method, color = method)) +
  stat_smooth(data = df, level = 0.8) +
  ggtitle('M1 vs M2 _ time')+
  ylab('e_val')+
  xlab('Ctime')
#Create labels
v1 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
v2 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 24.44
v3 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 28.88
w1 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
w2 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 22.85
w3 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 25.71

一个基本步骤是每个组的loess() 模型:

#Models and labels
m1 <- loess(e_val~Ctime,data=df[df$method=='VN',])
m2 <- loess(e_val~Ctime,data=df[df$method=='QV',])
pred1 <- predict(m1,data.frame(Ctime=c(120,160,200)))
pred2 <- predict(m2,data.frame(Ctime=c(120,160,200)))

这样,我们计算位置和标签:

#Labels and positions 1
df1 <- data.frame(x=c(120,160,200),
                  y=pred1,
                  lab=c(v1,v2,v3))
df2 <- data.frame(x=c(120,160,200),
                  y=pred2,
                  lab=c(w1,w2,w3))

#Labels and positions 2
df1 <- data.frame(x=c(120,160,200),
                  y=pred1,
                  lab=c(v1,v2,v3),
                  method='VN')
df2 <- data.frame(x=c(120,160,200),
                  y=pred2,
                  lab=c(w1,w2,w3),
                  method='QV')
df3 <- rbind(df1,df2)

现在第一个选项:

#Option 1
q1 <- p1+annotate(geom = 'point',x=df1$x,y=df1$y,color='blue',size=3,)+
  annotate(geom = 'point',x=df2$x,y=df2$y,color='red',size=3,)+
  annotate(geom = 'text',x=df1$x,y=df1$y,label=round(df1$lab,2),
           size=3,hjust=-0.5,color='cyan4',fontface='bold')+
  annotate(geom = 'text',x=df2$x,y=df2$y,label=round(df2$lab,2),
           size=3,hjust=-0.5,color='tomato',fontface='bold')

输出:

第二个选项:

#Option 2
q1 <- p1+geom_point(data=df3,aes(x=x,y=y,group=method,color=method),size=3,show.legend = F)+
  geom_text(data=df3,aes(x=x,y=y,group=method,label=round(lab,2)),
            hjust=-0.5,size=3,fontface='bold',show.legend = F)

输出:

更新:您可以将向量用于定义的中断值,例如mybreaks

library(tidyverse)
#Code
mybreaks <- c(100,150,200)
#Plot
p1 <- ggplot(data = df, aes(x = Ctime, y = e_val, group = method, color = method)) +
  stat_smooth(data = df, level = 0.8) +
  scale_x_continuous(breaks = mybreaks)+
  ggtitle('M1 vs M2 _ time')+
  ylab('e_val')+
  xlab('Ctime')
#Create labels
v1 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
v2 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 24.44
v3 <- mean(df[df$method=='QV' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 28.88
w1 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 120 == 0),]$step) # 20
w2 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 160 == 0),]$step) # 22.85
w3 <- mean(df[df$method=='VN' & (df$Ctime - df$Ctime %% 200 == 0),]$step) # 25.71
#Models and labels
m1 <- loess(e_val~Ctime,data=df[df$method=='VN',])
m2 <- loess(e_val~Ctime,data=df[df$method=='QV',])
pred1 <- predict(m1,data.frame(Ctime=mybreaks))
pred2 <- predict(m2,data.frame(Ctime=mybreaks))
#Labels and positions 1
df1 <- data.frame(x=mybreaks,
                  y=pred1,
                  lab=c(v1,v2,v3))
df2 <- data.frame(x=mybreaks,
                  y=pred2,
                  lab=c(w1,w2,w3))

#Labels and positions 2
df1 <- data.frame(x=mybreaks,
                  y=pred1,
                  lab=c(v1,v2,v3),
                  method='VN')
df2 <- data.frame(x=mybreaks,
                  y=pred2,
                  lab=c(w1,w2,w3),
                  method='QV')
df3 <- rbind(df1,df2)
#Option 1
q1 <- p1+annotate(geom = 'point',x=df1$x,y=df1$y,color='blue',size=3,)+
  annotate(geom = 'point',x=df2$x,y=df2$y,color='red',size=3,)+
  annotate(geom = 'text',x=df1$x,y=df1$y,label=round(df1$lab,2),
           size=3,hjust=-0.5,color='cyan4',fontface='bold')+
  annotate(geom = 'text',x=df2$x,y=df2$y,label=round(df2$lab,2),
           size=3,hjust=-0.5,color='tomato',fontface='bold')
#Option 2
q1 <- p1+geom_point(data=df3,aes(x=x,y=y,group=method,color=method),size=3,show.legend = F)+
  geom_text(data=df3,aes(x=x,y=y,group=method,label=round(lab,2)),
            hjust=-0.5,size=3,fontface='bold',show.legend = F)

输出:

【讨论】:

  • 非常感谢,这会很好用,但问题是 x 轴上的点从一开始就未知。这些是从 R 动态生成的。有没有办法从 ggplot 获取该信息并添加标签并计算点数?
  • @PanosKal。我认为您可以使用scale_y_continuous() 中的限制和中断来设置自己的中断,然后将其输入到黄土模型中。这样你就可以随心所欲地改变限制了!
  • @PanosKal。抱歉,它是 scale_x_continuous(),您可以将 breaks 与定义的向量一起使用。我会添加一个更新。
  • @PanosKal。总是很开心!只是一个题外话。您个人资料中的图片,该图片来自哪里?这是一个非常好的里昂!
  • 这是一本旧书,我没看过,不记得书名了,只是觉得图片不错:)
猜你喜欢
  • 1970-01-01
  • 1970-01-01
  • 2021-10-02
  • 1970-01-01
  • 1970-01-01
  • 2014-08-20
  • 1970-01-01
  • 2017-12-13
  • 2016-11-30
相关资源
最近更新 更多