【问题标题】:How to place odds ratio of Y value at one standard deviation above and below mean of X to a ggplot (or other R plot)如何将 Y 值的优势比置于高于和低于 X 平均值的一个标准差到 ggplot(或其他 R 图)
【发布时间】:2021-06-02 14:13:00
【问题描述】:

我的数据看起来像这样。

Probability <- c(1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)
Score2 <- c(2,3,4,2,3,4,2,3,4,4,3,2,3,2,3,2,3,2,3,4,2,3,4,2,3,3,4,3,2,3,2,3,2,3)
Data2 <- data.frame(Probability, Score2)

Data
   Probability Score2
1            1      2
2            1      3
3            1      4
4            1      2
5            1      3
6            1      4
7            1      2
8            1      3
9            1      4
10           0      4
11           0      3
12           0      2
13           0      3
14           0      2
15           0      3
16           0      2
17           0      3
18           1      2
19           1      3
20           1      4
21           1      2
22           1      3
23           1      4
24           1      2
25           1      3
26           1      3
27           0      4
28           0      3
29           0      2
30           0      3
31           0      2
32           0      3
33           0      2
34           0      3

我不仅需要绘制相关图,还需要添加优势比,比较 Y 在高于和低于 X 的平均值(具有相应的 p 值)的 1 个标准差时的优势。

除了 +/- 1 SD 的优势比及其 p 值之外,我得到了所有信息。

ggplot(Data2, aes(Score2, Probability))+ 
  geom_smooth(method='lm', alpha = .3, color = "black")+
  stat_cor(method = "pearson", label.x.npc = "left", label.y.npc= "top", label.sep = "
")+
  scale_colour_grey(start = .6, end = .2)+
  scale_fill_grey(start = 0.6, end = 1)+
  theme_classic()+
  scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1), limits = c(0, 1))

问题

如何添加优势比(比较高于和低于 X 平均值 1 SD 的概率)?

【问题讨论】:

    标签: r ggplot2 statistics regression logistic-regression


    【解决方案1】:

    您也可以使用线性概率模型的对比包来估计 CI。

    library(ggpubr)
    
    mean_x = mean(Score2)
    sd_x = sd(Score2)
    

    线性概率模型

    lm_prob <- lm(Probability ~ Score2,data=Data2)
    
    pred_probs = predict(lm_prob,newdata = data.frame(Score2 = c(mean_x - sd_x, mean_x+sd_x)))
    or_pred = (pred_probs[2]/(1-pred_probs[2]))/(pred_probs[1]/(1-pred_probs[1]))
    

    逻辑回归

    glm_prob <- glm(Probability ~ Score2,data=Data2,family=binomial())
    
    glm_pred_probs = predict(glm_prob,newdata = data.frame(Score2 = c(mean_x - sd_x, mean_x+sd_x)),type = "response",se.fit = TRUE)
    glm_or_pred = (glm_pred_probs$fit[2]/(1-glm_pred_probs$fit[2]))/(glm_pred_probs$fit[1]/(1-glm_pred_probs$fit[1]))
    

    对比语句可以用 mean_x + sd_x 和 mean_x - sd_x 更新。它不会改变结果。

    library(contrast)
    glm_contrast <- 
      contrast(glm_prob, 
               list(Score2 = sd_x),
               list(Score2 = -sd_x)
      )
    print(glm_contrast, X = TRUE)
    
    or_ci = paste0(round(exp(glm_contrast$Contrast),2), 
                   ", 95% CI:",
                   round(exp(glm_contrast$Lower),2),
                   ", ",
                   round(exp(glm_contrast$Upper),2),
                   ", p = ",
                   round(glm_contrast$Pvalue,3)
                   )
    

    情节

    ggplot(Data2, aes(Score2, Probability))+ 
      geom_smooth(method='lm', alpha = .3, color = "black")+
      stat_cor(method = "pearson", label.x.npc = "left", label.y.npc= "top", label.sep = "
    ")+
      annotate("text", x=3.0, y=0.9, label= paste0("OR = ",or_ci)) +
      scale_colour_grey(start = .6, end = .2)+
      scale_fill_grey(start = 0.6, end = 1)+
      theme_classic()+
      scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1), limits = c(0, 1))
    

    【讨论】:

      【解决方案2】:

      @jvargh7 让我了解了这个解决方案(减去标准偏差括号)here

      库(ggpubr)

      ### @jVargh7's code ###    
      library(ggplot2)  
      library(ggpubr)
        
      mean_x = mean(Score2)
      sd_x = sd(Score2)
      
      lm_prob <- lm(Probability ~ Score2,data=Data2)
      
      pred_probs = predict(lm_prob,newdata = data.frame(Score2 = c(mean_x - sd_x, mean_x+sd_x)))
      or_pred = (pred_probs[2]/(1-pred_probs[2]))/(pred_probs[1]/(1-pred_probs[1]))
      
      glm_prob <- glm(Probability ~ Score2,data=Data2,family=binomial())
      
      glm_pred_probs = predict(glm_prob,newdata = data.frame(Score2 = c(mean_x - sd_x, mean_x+sd_x)),type = "response",se.fit = TRUE)
      glm_or_pred = (glm_pred_probs$fit[2]/(1-glm_pred_probs$fit[2]))/(glm_pred_probs$fit[1]/(1-glm_pred_probs$fit[1]))
      
      install.packages("contrast")
      library(contrast)
      glm_contrast <- contrast(glm_prob, list(Score2 = sd_x), list(Score2 = -sd_x))
      print(glm_contrast, X = TRUE)
      
      or_ci = paste0(round(exp(glm_contrast$Contrast),2), 
                     ", 95% CI: ",
                     round(exp(glm_contrast$Lower),2),
                     ", ",
                     round(exp(glm_contrast$Upper),2),
                     ", p = ",
                     round(glm_contrast$Pvalue,3)
      )   
      
      ggplot(Data2, aes(Score2, Probability))+ 
        geom_smooth(method='lm', alpha = .3, color = "black")+
        stat_cor(method = "pearson", label.x = 2.75, label.y= .6, label.sep = ",")+
        scale_colour_grey(start = .6, end = .2)+
        scale_fill_grey(start = 0.6, end = 1)+
        theme_classic()+
        scale_y_continuous(breaks = c(0,0.25,0.5,0.75,1), limits = c(0, 1))+ 
      ### new code ###
        geom_bracket(xmin = mean(Score2)-sd(Score2), 
                     xmax = mean(Score2)+sd(Score2), 
                     y.position = .9, 
                     label = paste0("OR = ", or_ci), 
                     tip.length = c(0.42, 0.29),
                     vjust = -1)
      

      下面的示例输出

      再次感谢@jvargh7 让我大部分时间了解此解决方案here

      【讨论】:

      • 更新了对比度估计的解决方案。
      猜你喜欢
      • 2017-02-18
      • 1970-01-01
      • 2020-05-05
      • 1970-01-01
      • 1970-01-01
      • 2021-12-05
      • 1970-01-01
      • 1970-01-01
      • 2022-09-27
      相关资源
      最近更新 更多