【问题标题】:How to calculate several slopes from linear regressions in tidyverse如何从 tidyverse 中的线性回归计算几个斜率
【发布时间】:2020-11-14 05:48:59
【问题描述】:

随着时间的推移,我测量了土壤孵化(装有土壤的封闭罐子)中的甲烷浓度。为了计算甲烷产生率,我需要将二阶多项式回归模型拟合到甲烷浓度 (ch4_umol) 和时间 (stamp) 之间的关系。我想为我的数据集创建两个新列:回归线斜率的值和 Rsquared 值。我想为每个“jar_camp”计算这两个值。

有人可以帮忙吗?那太棒了!

免责声明:我是新手,主要使用 tidyverse。

我的数据如下所示:

structure(list(jar_camp = c("1_pf1.1", "1_pf1.1", "1_pf1.1", 
"1_pf1.1", "1_pf1.1", "1_pf1.1", "2_pf1.1", "2_pf1.1", "2_pf1.1", 
"2_pf1.1", "2_pf1.1", "2_pf1.1", "3_pf1.1", "3_pf1.1", "3_pf1.1", 
"3_pf1.1", "3_pf1.1", "1_pf2.1", "1_pf2.1", "1_pf2.1", "1_pf2.1", 
"1_pf2.1", "1_pf2.1", "2_pf2.1", "2_pf2.1", "2_pf2.1", "2_pf2.1", 
"2_pf2.1", "2_pf2.1", "3_pf2.1", "3_pf2.1", "3_pf2.1", "3_pf2.1", 
"3_pf2.1"), jar = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 
3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3), 
    campaign = c("pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", 
    "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", 
    "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf2.1", "pf2.1", 
    "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", 
    "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", 
    "pf2.1"), stamp = structure(c(1546688646, 1546688647, 1546688649, 
    1546688651, 1546688653, 1546688654, 1546689321, 1546689323, 
    1546689324, 1546689326, 1546689328, 1546689329, 1546689877, 
    1546689878, 1546689880, 1546689882, 1546689884, 1547031076, 
    1547031077, 1547031079, 1547031081, 1547031083, 1547031084, 
    1547032136, 1547032137, 1547032139, 1547032141, 1547032143, 
    1547032144, 1547033073, 1547033075, 1547033076, 1547033078, 
    1547033080), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
    ch4_umol = c(74.982885373, 74.315864696, 75.405874095, 73.876607177, 
    74.153176726, 74.429746275, 159.645704961, 159.661973758, 
    159.678242555, 159.694511352, 159.710780149, 159.75958654, 
    134.673101566, 135.779379762, 135.584154198, 135.600422995, 
    136.6578948, 455.542584797, 455.656466376, 455.998111113, 
    455.998111113, 455.623928782, 455.591391188, 461.838609236, 
    461.887415627, 461.985028409, 461.789802845, 461.627114875, 
    461.789802845, 441.356193813, 440.982011482, 441.20977464, 
    441.112161858, 441.112161858)), class = c("tbl_df", "tbl", 
"data.frame"), row.names = c(NA, -34L))

【问题讨论】:

    标签: r tidyverse


    【解决方案1】:

    与 tidyverse / purrr:

    data <-structure(list(jar_camp = c("1_pf1.1", "1_pf1.1", "1_pf1.1", 
                                       "1_pf1.1", "1_pf1.1", "1_pf1.1", "2_pf1.1", "2_pf1.1", "2_pf1.1", 
                                       "2_pf1.1", "2_pf1.1", "2_pf1.1", "3_pf1.1", "3_pf1.1", "3_pf1.1", 
                                       "3_pf1.1", "3_pf1.1", "1_pf2.1", "1_pf2.1", "1_pf2.1", "1_pf2.1", 
                                       "1_pf2.1", "1_pf2.1", "2_pf2.1", "2_pf2.1", "2_pf2.1", "2_pf2.1", 
                                       "2_pf2.1", "2_pf2.1", "3_pf2.1", "3_pf2.1", "3_pf2.1", "3_pf2.1", 
                                       "3_pf2.1"), jar = c(1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 
                                                           3, 3, 3, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3), 
                          campaign = c("pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", 
                                       "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", 
                                       "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf1.1", "pf2.1", "pf2.1", 
                                       "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", 
                                       "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", "pf2.1", 
                                       "pf2.1"), stamp = structure(c(1546688646, 1546688647, 1546688649, 
                                                                     1546688651, 1546688653, 1546688654, 1546689321, 1546689323, 
                                                                     1546689324, 1546689326, 1546689328, 1546689329, 1546689877, 
                                                                     1546689878, 1546689880, 1546689882, 1546689884, 1547031076, 
                                                                     1547031077, 1547031079, 1547031081, 1547031083, 1547031084, 
                                                                     1547032136, 1547032137, 1547032139, 1547032141, 1547032143, 
                                                                     1547032144, 1547033073, 1547033075, 1547033076, 1547033078, 
                                                                     1547033080), class = c("POSIXct", "POSIXt"), tzone = "UTC"), 
                          ch4_umol = c(74.982885373, 74.315864696, 75.405874095, 73.876607177, 
                                       74.153176726, 74.429746275, 159.645704961, 159.661973758, 
                                       159.678242555, 159.694511352, 159.710780149, 159.75958654, 
                                       134.673101566, 135.779379762, 135.584154198, 135.600422995, 
                                       136.6578948, 455.542584797, 455.656466376, 455.998111113, 
                                       455.998111113, 455.623928782, 455.591391188, 461.838609236, 
                                       461.887415627, 461.985028409, 461.789802845, 461.627114875, 
                                       461.789802845, 441.356193813, 440.982011482, 441.20977464, 
                                       441.112161858, 441.112161858)), class = c("tbl_df", "tbl", 
                                                                                 "data.frame"), row.names = c(NA, -34L))
    library(tidyverse)
    data <- data %>% group_by(campaign,jar_camp) %>% summarize(ch4_umol,dt=as.numeric(difftime(stamp,min(stamp)))) %>% ungroup()
    
    calclm <- function(data) {
      campaign = data$campaign[[1]]
      lm <-lm(formula = ch4_umol ~ dt , data = data)
      lm.summary = summary(lm)
      list(campaign = campaign,intercept=lm$coefficients[[1]],slope=lm$coefficients[[2]] ,r.squared = lm.summary$r.squared)
    }
    
    
    res <- data %>% split(.$jar_camp) %>% map(~calclm(.x)) %>% bind_rows(.id="jar_camp")
    res
    
    jar_camp campaign intercept    slope r.squared
      <chr>    <chr>        <dbl>    <dbl>     <dbl>
    1 1_pf1.1  pf1.1         74.9 -0.0813   0.215   
    2 1_pf2.1  pf2.1        456.   0.00188  0.000854
    3 2_pf1.1  pf1.1        160.   0.0126   0.906   
    4 2_pf2.1  pf2.1        462.  -0.0225   0.371   
    5 3_pf1.1  pf1.1        135.   0.201    0.665   
    6 3_pf2.1  pf2.1        441.  -0.0235   0.209   
    
    

    我将stamp 转换为从组开始的秒数 (dt),以便回归正常工作。

    【讨论】:

    • lm.summary$coefficients 包含每个系数的估计值、标准误差、t 值和 p 值。如果你只是想提取系数,lm$coefficients 可以。
    • 感谢@Darren Tsai,我简化了这部分。
    • @Magnus,我在结果中包含了摘要
    • @Magnus,如果你输出例如第一个结果:res[[1]]$summary$coefficients,你会得到 3 个 coef,截取到 y 轴,coef1 代表 dt,coef2 代表 dt2 = dt^2,这给出了我理解你的问题的二次回归:intercept + coef1 * dt1 + coef2 * dt^2。二次模型的斜率是导数:coef1 + 2 * coef2 * dt。您是在寻找二次回归还是线性回归?
    • r.squared 在结果中也可用,例如对于第一个回归:res[[1]]$summary$r.squared
    猜你喜欢
    • 1970-01-01
    • 2016-06-07
    • 2021-06-22
    • 2017-03-10
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2021-02-25
    • 2016-07-23
    相关资源
    最近更新 更多