所以这是一个小技巧,但它可能对你有用。
我在图表中引入第三列来保存我原来帖子中的标签。
我对您的数据进行了预处理,以尝试将第三列中的标签分散在 Tissue 变量周围,以使它们不会完全重叠。
我的预处理很丑陋,但工作正常。请注意,根据您的评论,我最多只能满足 4 种 cell.types 的需求。
它给了我这个图表:
我的代码:
data = data.frame("Tissue"=c("Adrenal gland", "Appendix", "Appendix"), "protein.expression" = c("No detect","No detect", "Medium"), "cell.type" = c("Glandular cells" ,"Lymphoid tissu","Glandular cells"))
# Pre-processing section.
# Step 1: find out the n of cell.types per tissue type
counters <- data %>% group_by(Tissue) %>% summarise(count = n())
# Step 2: Join n back to original data. Transform protein.expression to ordered factor
data <- data %>%
inner_join(counters, by="Tissue") %>%
mutate(protein = factor(protein.expression, levels=unique(protein.expression, decreasing = F), ordered=TRUE),
positionTissue = as.numeric(Tissue))
results <- data.frame()
# Step 3: Spread the cell.type labels around the position of the Tissue. 4 scenarios catered for.
for(t in unique(data$Tissue)){
subData <- filter(data, Tissue == t)
subData$spreader <- as.numeric(subData$Tissue)
if(length(unique(subData$cell.type)) == 2){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.1,as.numeric(Tissue)+0.1)) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 3){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.15,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.15,as.numeric(Tissue)))) %>%
select(-x)
results <- rbind(results, subData)
} else if(length(unique(subData$cell.type)) == 4){
subData <- subData %>%
mutate(x=factor(cell.type, levels=unique(cell.type, decreasing = F),ordered=TRUE),
spreader = ifelse(as.numeric(x)==1,as.numeric(Tissue)-0.2,
ifelse(as.numeric(x)==2,as.numeric(Tissue)-0.1,
ifelse(as.numeric(x)==3,as.numeric(Tissue)+0.1,
ifelse(as.numeric(x)==4,as.numeric(Tissue)+0.2,as.numeric(Tissue)))))) %>%
select(-x)
results <- rbind(results, subData)
} else{
results <- rbind(results, subData)
}
}
# Plot the data based on the new label position "spreader" variable
ggplot(results, aes(x = positionTissue, y = protein, label=cell.type)) +
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y=0.5,label=Tissue), size=8, fontface="bold", angle=90)+
geom_label(aes(y="zzz", x=spreader, fill=protein), colour="white") +
theme_classic() +
scale_x_continuous(limits = c(min(as.numeric(data$Tissue))-0.5,max(as.numeric(data$Tissue))+0.5))+
scale_y_discrete(breaks=c("Medium","No detect")) +
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
xlab("") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25),
axis.text.y = element_text(colour = NA),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))
编辑#2:
通过创建 n 个位置来更新以保留标签颜色,其中 n 是 cell.types 的数量:
data = data %>%
mutate(position = paste("z",cell.type))
然后您可以使用这个新的位置变量来代替我在原始帖子中建议的静态“zzz”。您的标签将具有正确的颜色,但如果有很多 cell.types,您的图表会看起来很奇怪。
geom_label(aes(y=position, label = cell.type)) +
编辑 #1:通过将 cell.types 分组到每个组织的单个标签来更新以避免重叠标签。
创建一个新的标签字段,用于连接每种组织类型的各个标签:
data = data %>%
group_by(Tissue) %>%
mutate(label = paste(cell.type, collapse = "; "))
并修改 ggplot 调用以使用这个新字段而不是现有的 cell.type 字段:
geom_text(aes(y="zzz", label = label), size = 6, fontface = "bold", colour = "white")+
或:
geom_label(aes(y="zzz", label = label),) +
原帖:
您可以将标签绘制在第三个位置(例如“zzz”),然后使用 scale_x_discrete(breaks=c()) 从轴标签集中隐藏该位置。
ggplot(data, aes(x = Tissue, y = factor(protein.expression,
levels=unique(protein.expression,
decreasing = F),
ordered=TRUE), fill = protein.expression,
label = cell.type))+
geom_point(stat='identity', aes(col=protein.expression), size=12) +
geom_text(aes(y="zzz"), size = 6, fontface = "bold", colour = "white")+
geom_label(aes(y="zzz"),) +
# facet_grid(cell.type ~ ., scales = "free", space = "free") +
# scale_fill_manual(values = myPalette, drop = FALSE) +
# scale_color_manual(values = myPalette, drop = FALSE) +
theme_classic() +
scale_y_discrete(breaks=c("Medium","No detect"))+
labs(title="Protein Atlas") +
guides(fill=guide_legend(title="Protein expression"))+
ylab("Cell types measured per tissue") +
#ylim(1,4) +
coord_flip()+
theme(axis.text.x = element_text(size = 25, vjust = 0.5, hjust = .9),
axis.text.y = element_text(size = 25),
legend.position = "none",
axis.title.x = element_text(size=30),
axis.title.y = element_text(size = 30, margin = margin(t = 0, r = 20, b = 0, l = 0)),
legend.title = element_text(size = 30),
legend.text = element_text(size = 25),
legend.key.size = unit(2, 'cm'),
axis.ticks.length=unit(.01, "cm"),
strip.text.y = element_text(angle = 0))