ggplot2 - 在大小图例中显示多个键（形状）答案

【问题标题】：ggplot2 - Show multiple keys (shapes) in size legendggplot2 - 在大小图例中显示多个键（形状）
【发布时间】：2020-10-13 22:13:00
【问题描述】：

我遇到了与显示图例相关的问题。我什至不确定这是否可以用 ggplot 解决，但由于我是 R 的基本用户，我相信这只是我的无知为我说话。

环境

我试图从两个不同的数据集中绘制两组不同的点，在相同的美学集上：x、y、大小。两组点相对于 y 轴和它们的形状在垂直微移中发生变化。

问题

情节按预期出现（参见代码下方的图片）。我唯一无法解决的问题与传说有关。特别是，我希望有尺寸图例来显示我在图表中使用的两种形状（在本例中为圆形和三角形）。有什么办法吗？我在互联网上浏览了一下，但没有找到任何与此问题有关的问题。

数据和代码

在这里，我将我使用的数据作为可重现的示例（通过dput() 获得）。

# Packages 
library(tidyverse)
library(ggtext)
library(janitor)
library(delabj)  
library(wesanderson) 
library(forcats)

# Basic data
basedata<-structure(list(country = structure(c("Argentina", "Argentina", 
"Argentina", "Argentina", "Argentina", "Argentina", "Argentina", 
"Argentina", "Argentina", "Argentina", "Argentina", "Argentina", 
"Argentina", "Argentina"), format.stata = "%44s"), iso = structure(c("ARG", 
"ARG", "ARG", "ARG", "ARG", "ARG", "ARG", "ARG", "ARG", "ARG", 
"ARG", "ARG", "ARG", "ARG"), label = "iso_3", format.stata = "%9s"), 
    region = structure(c("Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean", 
    "Latin America & Caribbean", "Latin America & Caribbean"), label = "Region", format.stata = "%26s"), 
    income_group = structure(c("Upper middle income", "Upper middle income", 
    "Upper middle income", "Upper middle income", "Upper middle income", 
    "Upper middle income", "Upper middle income", "Upper middle income", 
    "Upper middle income", "Upper middle income", "Upper middle income", 
    "Upper middle income", "Upper middle income", "Upper middle income"
    ), label = "Income group", format.stata = "%19s"), gdp = structure(c(519871519807.795, 
    519871519807.795, 519871519807.795, 519871519807.795, 519871519807.795, 
    519871519807.795, 519871519807.795, 519871519807.795, 519871519807.795, 
    519871519807.795, 519871519807.795, 519871519807.795, 519871519807.795, 
    519871519807.795), label = "(first) gdp_o", format.stata = "%9.0g"), 
    assessment = structure(c(2, 2, 1, 1, 2, 1, 2, 1, 1, 1, 1, 
    1, 2, 1), label = "Initial assessment", format.stata = "%12.0g", labels = c(liberalising = 1, 
    restrictive = 2)), start_date = structure(c(18341, 18349, 
    18354, 18366, 18393, 18393, 18270, 18270, 18339, 18354, 18354, 
    18393, 18393, 18351), label = "Announcement date", class = "Date", format.stata = "%td"), 
    duration = structure(c(357, 349, 344, 63, 33, 305, 156, 156, 
    87, 62, 344, 305, 33, 359), format.stata = "%9.0g"), GTAinterventiontype = structure(c("Export licensing requirement", 
    "Export licensing requirement", "Import Internal taxation", 
    "Import Internal taxation", "Import Internal taxation", "Import Internal taxation", 
    "Import licensing requirement", "Import licensing requirement", 
    "Import licensing requirement", "Import licensing requirement", 
    "Import tariff", "Import tariff", "Import tariff", "Import-related, Unknown measure"
    ), label = "GTA intervention type", format.stata = "%38s"), 
    any_food = structure(c(0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 
    0, 0), label = "Product: Food", format.stata = "%8.0g", labels = c(`FALSE` = 0, 
    `TRUE` = 1)), any_medical = structure(c(1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1), label = "Product: Any medical product", format.stata = "%8.0g", labels = c(`FALSE` = 0, 
    `TRUE` = 1)), food_position = structure(c("Net Exporter", 
    "Net Exporter", "Net Exporter", "Net Exporter", "Net Exporter", 
    "Net Exporter", "Net Exporter", "Net Exporter", "Net Exporter", 
    "Net Exporter", "Net Exporter", "Net Exporter", "Net Exporter", 
    "Net Exporter"), format.stata = "%12s"), meds_position = structure(c("Net Importer", 
    "Net Importer", "Net Importer", "Net Importer", "Net Importer", 
    "Net Importer", "Net Importer", "Net Importer", "Net Importer", 
    "Net Importer", "Net Importer", "Net Importer", "Net Importer", 
    "Net Importer"), format.stata = "%12s"), month = c(3, 3, 
    4, 4, 5, 5, 1, 1, 3, 4, 4, 5, 5, 3), Announcement = c("March", 
    "March", "April", "April", "May", "May", "January", "January", 
    "March", "April", "April", "May", "May", "March"), Domain = c("Medical", 
    "Medical", "Medical", "Medical", "Medical", "Medical", "Food & Meds", 
    "Food & Meds", "Medical", "Medical", "Medical", "Medical", 
    "Medical", "Medical"), gdp_group_avg = c("Low Income", "Low Income", 
    "Low Income", "Low Income", "Low Income", "Low Income", "Low Income", 
    "Low Income", "Low Income", "Low Income", "Low Income", "Low Income", 
    "Low Income", "Low Income"), gdp_group_med = c("High Income", 
    "High Income", "High Income", "High Income", "High Income", 
    "High Income", "High Income", "High Income", "High Income", 
    "High Income", "High Income", "High Income", "High Income", 
    "High Income"), shp_point = c(23, 23, 21, 21, 23, 21, 23, 
    21, 21, 21, 21, 21, 23, 21), length_cat = c("More than 3 months", 
    "More than 3 months", "More than 3 months", "[1-3] months", 
    "[1-3] months", "More than 3 months", "More than 3 months", 
    "More than 3 months", "[1-3] months", "[1-3] months", "More than 3 months", 
    "More than 3 months", "[1-3] months", "More than 3 months"
    ), type = structure(c(2L, 2L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 
    1L, 1L, 1L, 2L, 1L), .Label = c("Liberalizig", "Restrictive"
    ), class = "factor")), row.names = c(NA, -14L), class = "data.frame")

# Subset 1 - assessment == 1

prova1<-structure(list(country = c("Argentina", "Argentina", "Argentina", 
"Argentina", "Argentina", "Argentina", "Argentina", "Argentina", 
"Argentina"), iso = c("ARG", "ARG", "ARG", "ARG", "ARG", "ARG", 
"ARG", "ARG", "ARG"), region = c("Latin America & Caribbean", 
"Latin America & Caribbean", "Latin America & Caribbean", "Latin America & Caribbean", 
"Latin America & Caribbean", "Latin America & Caribbean", "Latin America & Caribbean", 
"Latin America & Caribbean", "Latin America & Caribbean"), income_group = c("Upper middle income", 
"Upper middle income", "Upper middle income", "Upper middle income", 
"Upper middle income", "Upper middle income", "Upper middle income", 
"Upper middle income", "Upper middle income"), gdp = c(519871519807.795, 
519871519807.795, 519871519807.795, 519871519807.795, 519871519807.795, 
519871519807.795, 519871519807.795, 519871519807.795, 519871519807.795
), assessment = c(1, 1, 1, 1, 1, 1, 1, 1, 1), start_date = structure(c(18354, 
18366, 18393, 18270, 18339, 18354, 18354, 18393, 18351), class = "Date"), 
    duration = c(344, 63, 305, 156, 87, 62, 344, 305, 359), GTAinterventiontype = c("Import Internal taxation", 
    "Import Internal taxation", "Import Internal taxation", "Import licensing requirement", 
    "Import licensing requirement", "Import licensing requirement", 
    "Import tariff", "Import tariff", "Import-related, Unknown measure"
    ), any_food = c(0, 0, 0, 1, 0, 0, 0, 0, 0), any_medical = c(1, 
    1, 1, 1, 1, 1, 1, 1, 1), food_position = c("Net Exporter", 
    "Net Exporter", "Net Exporter", "Net Exporter", "Net Exporter", 
    "Net Exporter", "Net Exporter", "Net Exporter", "Net Exporter"
    ), meds_position = c("Net Importer", "Net Importer", "Net Importer", 
    "Net Importer", "Net Importer", "Net Importer", "Net Importer", 
    "Net Importer", "Net Importer"), month = c(4, 4, 5, 1, 3, 
    4, 4, 5, 3), Announcement = c("April", "April", "May", "January", 
    "March", "April", "April", "May", "March"), Domain = c("Medical", 
    "Medical", "Medical", "Food & Meds", "Medical", "Medical", 
    "Medical", "Medical", "Medical"), gdp_group_avg = c("Low Income", 
    "Low Income", "Low Income", "Low Income", "Low Income", "Low Income", 
    "Low Income", "Low Income", "Low Income"), gdp_group_med = c("High Income", 
    "High Income", "High Income", "High Income", "High Income", 
    "High Income", "High Income", "High Income", "High Income"
    ), shp_point = c(21, 21, 21, 21, 21, 21, 21, 21, 21), length_cat = c("More than 3 months", 
    "[1-3] months", "More than 3 months", "More than 3 months", 
    "[1-3] months", "[1-3] months", "More than 3 months", "More than 3 months", 
    "More than 3 months"), type = structure(c(1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("Liberalizig", "Restrictive"
    ), class = "factor")), row.names = c(3L, 4L, 6L, 8L, 9L, 
10L, 11L, 12L, 14L), class = "data.frame")

# Subset 2 - assessment == 2

prova2<-structure(list(country = c("Argentina", "Argentina", "Argentina", 
"Argentina", "Argentina"), iso = c("ARG", "ARG", "ARG", "ARG", 
"ARG"), region = c("Latin America & Caribbean", "Latin America & Caribbean", 
"Latin America & Caribbean", "Latin America & Caribbean", "Latin America & Caribbean"
), income_group = c("Upper middle income", "Upper middle income", 
"Upper middle income", "Upper middle income", "Upper middle income"
), gdp = c(519871519807.795, 519871519807.795, 519871519807.795, 
519871519807.795, 519871519807.795), assessment = c(2, 2, 2, 
2, 2), start_date = structure(c(18341, 18349, 18393, 18270, 18393
), class = "Date"), duration = c(357, 349, 33, 156, 33), GTAinterventiontype = c("Export licensing requirement", 
"Export licensing requirement", "Import Internal taxation", "Import licensing requirement", 
"Import tariff"), any_food = c(0, 0, 0, 1, 0), any_medical = c(1, 
1, 1, 1, 1), food_position = c("Net Exporter", "Net Exporter", 
"Net Exporter", "Net Exporter", "Net Exporter"), meds_position = c("Net Importer", 
"Net Importer", "Net Importer", "Net Importer", "Net Importer"
), month = c(3, 3, 5, 1, 5), Announcement = c("March", "March", 
"May", "January", "May"), Domain = c("Medical", "Medical", "Medical", 
"Food & Meds", "Medical"), gdp_group_avg = c("Low Income", "Low Income", 
"Low Income", "Low Income", "Low Income"), gdp_group_med = c("High Income", 
"High Income", "High Income", "High Income", "High Income"), 
    shp_point = c(23, 23, 23, 23, 23), length_cat = c("More than 3 months", 
    "More than 3 months", "[1-3] months", "More than 3 months", 
    "[1-3] months"), type = structure(c(2L, 2L, 2L, 2L, 2L), .Label = c("Liberalizig", 
    "Restrictive"), class = "factor")), row.names = c(1L, 2L, 
5L, 7L, 13L), class = "data.frame")

注意prova1 和prova2 只是basedata 的两个子集。剧情代码如下

countrydata %>%
  ggplot(aes(x = start_date, y = fct_rev(GTAinterventiontype), shape = type)) +
  geom_point(data = prova1, aes(color = fct_rev(GTAinterventiontype),size=duration, shape = fct_rev(type)), alpha = 0.65, position = position_nudge(y = +0.05)) +
  geom_point(data = prova2, aes(color = fct_rev(GTAinterventiontype),size=duration, shape = fct_rev(type)), alpha = 0.65, position = position_nudge(y = -0.05)) +
  scale_shape(drop=FALSE) +
  guides(color = FALSE,
         shape = guide_legend(order = 1, nrow = 2, ncol = 1),
         size = guide_legend(order = 2)) + 
  delabj::theme_delabj() +
  delabj::scale_color_delabj() +
  #delabj::legend_none() + 
  labs(shape = 'Type',
       size = "Duration", 
       x="",
       y="", 
       title = paste("ARG", "Med-related Measures by Announcement date", sep = ": "),
       subtitle = "Bubbles are proportional to expected duration of the measure",
       caption = "")

结果图就是这个。

任何想法、建议，甚至不可行的警告（如果确实如此）都非常受欢迎！

【问题讨论】：

标签： r ggplot2 data-visualization legend legend-properties

【解决方案1】：

试试这个。基本思想是复制尺寸图例的断点和符号。在第二步中，我通过guide_legend 调整符号。也许并不完美，但在尝试了一些方法后，我能想出最好的方法。

library(tidyverse)
library(ggtext)
library(janitor)
library(delabj)
library(wesanderson)
library(forcats)

# Breaks, labels and symbols
breaks <- c(100, 200, 300)
n_breaks <- length(breaks)
labels <- c(breaks, rep("", n_breaks))
shapes <- c(rep(16, n_breaks), rep(17, n_breaks))
breaks2 <- rep(breaks, 2)

basedata %>%
  ggplot(aes(x = start_date, y = fct_rev(GTAinterventiontype), shape = type)) +
  geom_point(data = prova1, aes(color = fct_rev(GTAinterventiontype), size=duration, shape = fct_rev(type)), alpha = 0.65, position = position_nudge(y = +0.05)) +
  scale_size_continuous(breaks = breaks2, labels = labels,
                        guide = guide_legend(order = 2, nrow = 2, byrow = TRUE,
                                             override.aes = list(shape = shapes),
                                             direction = "horizontal", label.vjust = -.5)) +
  geom_point(data = prova2, aes(color = fct_rev(GTAinterventiontype), size=duration, shape = fct_rev(type)), alpha = 0.65, position = position_nudge(y = -0.05)) +
  scale_shape(drop=FALSE) +
  guides(color = FALSE,
         shape = guide_legend(order = 1, nrow = 2, ncol = 1)) + 
  delabj::theme_delabj() +
  delabj::scale_color_delabj() +
  #delabj::legend_none() + 
  labs(shape = 'Type',
       size = "Duration", 
       x="",
       y="", 
       title = paste("ARG", "Med-related Measures by Announcement date", sep = ": "),
       subtitle = "Bubbles are proportional to expected duration of the measure",
       caption = "")

【讨论】：

谢谢@stefan！从图形上看，这正是我想要的。我只有一个问题要问你。我在问题中省略的是，我想在大量国家/地区进行循环（这里我只提供了阿根廷的数据）。问题是，相对大小会根据我正在考虑的国家/地区而变化。你认为有什么方法可以让scale_size_continuous 元素适应循环每一步所考虑的数据吗？顺便说一句，这正是我想要的！
嗨@FilippoSanti。一般来说，这没什么大不了的。取决于你想如何设置休息时间。我调整了我的代码，向您展示了一种使代码更通用的方法。
非常感谢。你是对的，这没什么大不了的，但不幸的是，我只有在你调整代码后才意识到！再次感谢您的帮助！