【问题标题】:Pyramid plot in RR中的金字塔图
【发布时间】:2015-11-20 16:55:33
【问题描述】:

对于示例数据集,我按国家/地区创建了一个金字塔图,显示人口中超重男性和女性的水平 (%)。

library(plotrix)
xy.males.overweight<-c(23.2,33.5,43.6,33.6,43.5,43.5,43.9,33.7,53.9,43.5,43.2,42.8,22.2,51.8,
          41.5,31.3,60.7,50.4)
    xx.females.overweight<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
              12.3,10,0.8)
    agelabels<-c("uk","scotland","france","ireland","germany","sweden","norway",
                     "iceland","portugal","austria","switzerland","australia","new zealand","dubai","south africa",
                     "finland","italy","morocco")

    par(mar=pyramid.plot(xy.males.overweight,xx.females.overweight,labels=agelabels,
                                 gap=9))

我在这里使用“plottrix”找到了这种方法: https://stats.stackexchange.com/questions/2455/how-to-make-age-pyramid-like-plot-in-r

我希望创建一个更详细的金字塔图,并在两侧添加一个堆叠条形图,显示男性和女性的超重和肥胖百分比(最好是不同深浅的红色/蓝色)。下面列出了“肥胖”的示例数据值:

xx.females.obese<-c(23.2,33.5,43.6,33.6,43.5,23.5,33.9,33.7,23.9,43.5,18.2,22.8,22.2,31.8,
                       25.5,25.3,31.7,28.4)
xy.males.obese<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
                  12.3,10,0.8)

此外,如果图表上的“年龄”可以更改(到国家/地区),那将很有帮助。

非常感谢您提供任何帮助/建议。我愿意酌情使用 plotrix 或 ggplot2。

【问题讨论】:

    标签: r plot ggplot2


    【解决方案1】:

    Plotrix 可能更简单,但可以拆解 ggplot 图表,并将它们排列为金字塔图。使用@eipi10 的数据(谢谢),并从drawing-pyramid-plot-using-r-and-ggplot2 改编代码,我为“男性”、“女性”和“国家”标签绘制了单独的图。另外,我从其中一个情节中抓住了一个传说。诀窍是让左侧图表的刻度线出现在图表的右侧 - 我改编了来自mirroring-axis-ticks-in-ggplot2 的代码。使用 gtable 函数将四个位(“女性”图、国家标签、“男性图”和图例)放在一起。

    小修改:更新到 ggplot2 2.2.1

    # Packages
    library(plyr)
    library(ggplot2)
    library(scales)
    library(gtable)
    library(stringr)
    library(grid)
    
    # Data
    mov <-c(23.2,33.5,43.6,33.6,43.5,43.5,43.9,33.7,53.9,43.5,43.2,42.8,22.2,51.8,
                               41.5,31.3,60.7,50.4)
    
    fov<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
                             12.3,10,0.8)
    fob<-c(23.2,33.5,43.6,33.6,43.5,23.5,33.9,33.7,23.9,43.5,18.2,22.8,22.2,31.8,
                        25.5,25.3,31.7,28.4)
    mob<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
                      12.3,10,0.8)
    labs<-c("uk","scotland","france","ireland","germany","sweden","norway",
                 "iceland","portugal","austria","switzerland","australia",
                 "new zealand","dubai","south africa",
                 "finland","italy","morocco")
    
    df = data.frame(labs=rep(labs,4), values=c(mov, mob, fov, fob), 
                    sex=rep(c("Male", "Female"), each=2*length(fov)),
                    bmi = rep(rep(c("Overweight", "Obese"), each=length(fov)),2))
    
    # Order countries by overall percent overweight/obese
    labs.order = ddply(df, .(labs), summarise, sum=sum(values))
    labs.order = labs.order$labs[order(labs.order$sum)]
    df$labs = factor(df$labs, levels=labs.order)
    
    
    # Common theme
    theme = theme(panel.grid.minor = element_blank(),
             panel.grid.major = element_blank(), 
             axis.text.y = element_blank(), 
             axis.title.y = element_blank(),
             plot.title = element_text(size = 10, hjust = 0.5))
    
    
    #### 1. "male" plot - to appear on the right
    ggM <- ggplot(data = subset(df, sex == 'Male'), aes(x=labs)) +
       geom_bar(aes(y = values/100, fill = bmi), stat = "identity") +
       scale_y_continuous('', labels = percent, limits = c(0, 1), expand = c(0,0)) + 
       labs(x = NULL) +
       ggtitle("Male") +
       coord_flip() + theme +
       theme(plot.margin= unit(c(1, 0, 0, 0), "lines"))
    
    # get ggplot grob
    gtM <- ggplotGrob(ggM)
    
    
    #### 4. Get the legend
    leg = gtM$grobs[[which(gtM$layout$name == "guide-box")]]
    
    
    #### 1. back to "male" plot - to appear on the right
    # remove legend
    legPos = gtM$layout$l[grepl("guide", gtM$layout$name)]  # legend's position
    gtM = gtM[, -c(legPos-1,legPos)] 
    
    
    #### 2. "female" plot - to appear on the left - 
    # reverse the 'Percent' axis using trans = "reverse"
    ggF <- ggplot(data = subset(df, sex == 'Female'), aes(x=labs)) +
       geom_bar(aes(y = values/100, fill = bmi), stat = "identity") +
       scale_y_continuous('', labels = percent, trans = 'reverse', 
          limits = c(1, 0), expand = c(0,0)) + 
       labs(x = NULL) +
       ggtitle("Female") +
       coord_flip() + theme +
       theme(plot.margin= unit(c(1, 0, 0, 1), "lines"))
    
    # get ggplot grob
    gtF <- ggplotGrob(ggF)
    
    # remove legend
    
    gtF = gtF[, -c(legPos-1,legPos)]
    
    
    ## Swap the tick marks to the right side of the plot panel
    # Get the row number of the left axis in the layout
    rn <- which(gtF$layout$name == "axis-l")
    
    # Extract the axis (tick marks and axis text)
    axis.grob <- gtF$grobs[[rn]]
    axisl <- axis.grob$children[[2]]  # Two children - get the second
    # axisl  # Note: two grobs -  text and tick marks
    
    # Get the tick marks - NOTE: tick marks are second
    yaxis = axisl$grobs[[2]] 
    yaxis$x = yaxis$x - unit(1, "npc") + unit(2.75, "pt") # Reverse them
    
    # Add them to the right side of the panel
    # Add a column to the gtable
    panelPos = gtF$layout[grepl("panel", gtF$layout$name), c('t','l')]
    gtF <- gtable_add_cols(gtF, gtF$widths[3], panelPos$l)
    # Add the grob
    gtF <-  gtable_add_grob(gtF, yaxis, t = panelPos$t, l = panelPos$l+1)
    
    # Remove original left axis
    gtF = gtF[, -c(2,3)] 
    
    
    #### 3. country labels - create a plot using geom_text - to appear down the middle
    fontsize = 3
    ggC <- ggplot(data = subset(df, sex == 'Male'), aes(x=labs)) +
       geom_bar(stat = "identity", aes(y = 0)) +
       geom_text(aes(y = 0,  label = labs), size = fontsize) +
       ggtitle("Country") +
       coord_flip() + theme_bw() + theme +
       theme(panel.border = element_rect(colour = NA))
    
    # get ggplot grob
    gtC <- ggplotGrob(ggC)
    
    # Get the title
    Title = gtC$grobs[[which(gtC$layout$name == "title")]]
    
    # Get the plot panel
    gtC = gtC$grobs[[which(gtC$layout$name == "panel")]]
    
    
    #### Arrange the components
    ## First, combine "female" and "male" plots
    gt = cbind(gtF, gtM, size = "first")
    
    ## Second, add the labels (gtC) down the middle
    # add column to gtable
    maxlab = labs[which(str_length(labs) == max(str_length(labs)))]
    gt = gtable_add_cols(gt, sum(unit(1, "grobwidth", textGrob(maxlab, gp = gpar(fontsize = fontsize*72.27/25.4))), unit(5, "mm")), 
               pos = length(gtF$widths))
    
    # add the grob
    gt = gtable_add_grob(gt, gtC, t = panelPos$t, l = length(gtF$widths) + 1)
    
    # add the title; ie the label 'country' 
    titlePos = gtF$layout$l[which(gtF$layout$name == "title")]
    gt = gtable_add_grob(gt, Title, t = titlePos, l = length(gtF$widths) + 1)
    
    
    ## Third, add the legend to the right
    gt = gtable_add_cols(gt, sum(leg$width), -1)
    gt = gtable_add_grob(gt, leg, t = panelPos$t, l = length(gt$widths))
    
    # draw the plot
    grid.newpage()
    grid.draw(gt)
    

    【讨论】:

    • 向你致敬,桑迪抽出时间提取了中间的国家名称(我在回答中采取了懒惰的路线)。
    • 感谢两位的帮助 - 非常感谢。由于中间的国家标签,我只选择了第二个答案。
    【解决方案2】:

    使用ggplot2 并改编来自this SO answer 的代码:

    library(plyr)
    library(ggplot2)
    
    # Data
    mov <-c(23.2,33.5,43.6,33.6,43.5,43.5,43.9,33.7,53.9,43.5,43.2,42.8,22.2,51.8,
                               41.5,31.3,60.7,50.4)
    
    fov<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
                             12.3,10,0.8)
    fob<-c(23.2,33.5,43.6,33.6,43.5,23.5,33.9,33.7,23.9,43.5,18.2,22.8,22.2,31.8,
                        25.5,25.3,31.7,28.4)
    mob<-c(13.2,9.4,13.5,13.5,13.5,23.7,8,3.18,3.9,3.16,23.2,22.5,22,12.7,12.5,
                      12.3,10,0.8)
    labs<-c("uk","scotland","france","ireland","germany","sweden","norway",
                 "iceland","portugal","austria","switzerland","australia",
                 "new zealand","dubai","south africa",
                 "finland","italy","morocco")
    
    df = data.frame(labs=rep(labs,4), values=c(mov, mob, fov, fob), 
                    sex=rep(c("Male", "Female"), each=2*length(fov)),
                    bmi = rep(rep(c("Overweight", "Obese"), each=length(fov)),2))
    
    # Order countries by overall percent overweight/obese
    labs.order = ddply(df, .(labs), summarise, sum=sum(values))
    labs.order = labs.order$labs[order(labs.order$sum)]
    df$labs = factor(df$labs, levels=labs.order)
    

    绘制单独的男性和女性子集以获得金字塔图:

    ggplot(df, aes(x=labs)) +
      geom_bar(data=df[df$sex=="Male",], aes(y=values, fill=bmi), stat="identity") +
      geom_bar(data=df[df$sex=="Female",], aes(y=-values, fill=bmi), stat="identity") +
      geom_hline(yintercept=0, colour="white", lwd=1) +
      coord_flip(ylim=c(-101,101)) + 
      scale_y_continuous(breaks=seq(-100,100,50), labels=c(100,50,0,50,100)) +
      labs(y="Percent", x="Country") +
      ggtitle("Female                                                 Male")
    

    【讨论】:

    • 我尝试使用此代码(将其复制粘贴到 R)并不断收到错误:未知参数:子集
    • 是的,subset 不再是 ggplot2 的一部分。只需将subset=.(sex=="Male") 替换为df[df$sex=="Male", ] 并同样替换为Female
    • 现在好像还有一个问题,Error: ggplot2 doesn't know how to deal with class uneval 的数据
    • 抱歉,您需要明确包含 data 参数。请参阅我的答案中的更新代码。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 2021-08-20
    • 2016-11-05
    • 2013-03-26
    • 1970-01-01
    • 1970-01-01
    • 2016-08-20
    相关资源
    最近更新 更多