ggplot 和 geom_text()答案

【问题标题】：ggplot and the geom_text()ggplot 和 geom_text()
【发布时间】：2015-11-27 17:18:29
【问题描述】：

我是 R 和 ggplot2 的新手。我对此进行了很多搜索，但找不到解决方案。

Sample  observation1    observation2    observation3    percentage
sample1_A   163453473   131232689   61984186    30.6236955883
Sample1_B   170151351   137202212   59242536    26.8866816109
sample2_A   194102849   162112484   89158170    40.4183031852
sample2_B   170642240   141888123   79925652    41.7493687378
sample3_A   192858504   161227348   90532447    41.8068248626
sample3_B   177174787   147412720   81523935    40.5463120438
sample4_A   199232380   174656081   118115358   55.6409038531
sample4_B   211128931   186848929   123552556   54.7201927527
sample5_A   186039420   152618196   87012356    40.9656544833
sample5_B   145855252   118225865   66265976    39.5744515254
sample6_A   211165202   186625116   112710053   48.5457722338
sample6_B   220522502   193191927   114882014   47.238670909

我打算用ggplot2 绘制一个条形图。我想将前三列绘制为条形图“闪避”并用百分比标记observation3 条形图。我可以绘制如下条形图，但无法使用geom_text() 添加标签。

data1 <- read.table("readStats.txt", header=T)
data1.long <- melt(data1)
ggplot(data1.long[1:36,], aes(data1.long$Sample[1:36],y=data1.long$value[1:36], fill=data1.long$variable[1:36])) + geom_bar(stat="identity", width=0.5, position="dodge")

【问题讨论】：

标签： r ggplot2 bar-chart

【解决方案1】：

将 data1 转换为长格式，观察列作为度量变量，Sample 和 percentage 列作为 id 变量。计算用于放置百分比的最大值mx。然后执行剧情。请注意geom_bar 使用data1.long 但geom_text 使用data1。 我们为文本着色，使百分比与observation3 条的颜色相同。（有关如何指定默认颜色，请参阅this post。）两者都继承aes(x = Sample)，但使用不同的y 和其他美学。我们通过删除 data1$Sample 中的所有小写字母和下划线来清理 X 轴标签（可选）。

library(ggplot2)
library(reshape2)

data1.long <- melt(data1, measure = 2:4)  # cols 2:4 are observation1, ..., observation3
mx <- max(data1.long$value)  # maximum observation value
ggplot(data1.long, aes(x = Sample, y = value)) +
   geom_bar(aes(fill = variable), stat = "identity", width = 0.5, position = "dodge") + 
   geom_text(aes(y = mx, label = paste0(round(percentage), "%")), data = data1, 
        col = "#619CFF", vjust = -0.5) +
   scale_x_discrete(labels = gsub("[a-z_]", "", data1$Sample))

（点击图表放大）

注意：我们使用了这些数据。请注意，Sample 的一次出现已更改为带有小写 s 的 sample：

Lines <- "Sample  observation1    observation2    observation3    percentage
sample1_A   163453473   131232689   61984186    30.6236955883
sample1_B   170151351   137202212   59242536    26.8866816109
sample2_A   194102849   162112484   89158170    40.4183031852
sample2_B   170642240   141888123   79925652    41.7493687378
sample3_A   192858504   161227348   90532447    41.8068248626
sample3_B   177174787   147412720   81523935    40.5463120438
sample4_A   199232380   174656081   118115358   55.6409038531
sample4_B   211128931   186848929   123552556   54.7201927527
sample5_A   186039420   152618196   87012356    40.9656544833
sample5_B   145855252   118225865   66265976    39.5744515254
sample6_A   211165202   186625116   112710053   48.5457722338
sample6_B   220522502   193191927   114882014   47.238670909"

data1 <- read.table(text = Lines, header = TRUE)

更新：小改进

【讨论】：

【解决方案2】：

G. Grothendieck's answer 可能是一个更好的解决方案，但这是我的建议（代码如下）

# install.packages("ggplot2", dependencies = TRUE)
require(ggplot2)

df <- structure(list(Sample = structure(1:12, .Label = c("sample1_A", 
"Sample1_B", "sample2_A", "sample2_B", "sample3_A", "sample3_B", 
"sample4_A", "sample4_B", "sample5_A", "sample5_B", "sample6_A", 
"sample6_B"), class = "factor"), observation1 = c(163453473L, 
170151351L, 194102849L, 170642240L, 192858504L, 177174787L, 199232380L, 
211128931L, 186039420L, 145855252L, 211165202L, 220522502L), 
    observation2 = c(131232689L, 137202212L, 162112484L, 141888123L, 
    161227348L, 147412720L, 174656081L, 186848929L, 152618196L, 
    118225865L, 186625116L, 193191927L), observation3 = c(61984186L, 
    59242536L, 89158170L, 79925652L, 90532447L, 81523935L, 118115358L, 
    123552556L, 87012356L, 66265976L, 112710053L, 114882014L), 
    percentage = c(30.6236955883, 26.8866816109, 40.4183031852, 
    41.7493687378, 41.8068248626, 40.5463120438, 55.6409038531, 
    54.7201927527, 40.9656544833, 39.5744515254, 48.5457722338, 
    47.238670909)), .Names = c("Sample", "observation1", "observation2", 
"observation3", "percentage"), class = "data.frame", row.names = c(NA, 
-12L))

# install.packages("reshape2", dependencies = TRUE)
require(reshape2)

    data1.long <- melt(df, id=c("Sample"), measure.var = c("observation1", "observation2", "observation3"))


data1.long$percentage <- paste(round(data1.long$percentage, 2), "%", sep="")
data1.long[data1.long$variable == "observation1" | data1.long$variable == "observation2" ,2] <- ""

ggplot(data1.long, aes(x = Sample, y = value, fill=variable)) + 
       geom_bar(, stat="identity", width=0.5, position="dodge") + 
       geom_text(aes(label = percentage), vjust=2.10, size=2, hjust=-.06, angle = 90)

【讨论】：