【问题标题】:Case When Sequential Character Values and Time Greater Than Specified Interval连续字符值和时间大于指定间隔的情况
【发布时间】:2021-03-22 20:28:55
【问题描述】:

Reprex

data
# A tibble: 82 x 12
# Groups:   SUBJ_ID, READER [2]
   SUBJ_ID VISIT DOS        MOS_DUR READER   SPD SPD_NADIR PCT_DIFF_NADIR SPD_BL PCT_DIFF_BL
   <chr>   <chr> <date>       <dbl> <fct>  <dbl>     <dbl>          <dbl>  <dbl>       <dbl>
 1 1002-3~ 1 Sc~ 2012-06-13   0     2       275.      275.         NA       275.      NA    
 2 1002-3~ 1 We~ 2012-07-12   0.967 2       358.      275.          0.302   275.       0.302
 3 1002-3~ 1 We~ 2012-08-09   1.87  2       439.      275.          0.594   275.       0.594
 4 1002-3~ 1 We~ 2012-09-18   3.17  2       528.      275.          0.919   275.       0.919
 5 1002-3~ Unsc~ 2012-10-25   4.39  2       584.      275.          1.12    275.       1.12 
 6 1002-3~ Unsc~ 2012-12-20   6.23  2       573.      275.          1.08    275.       1.08 
 7 1002-3~ Unsc~ 2013-02-13   8     2       516.      275.          0.872   275.       0.872
 8 1002-3~ Unsc~ 2013-03-29   9.52  2       532.      275.          0.930   275.       0.930
 9 1002-3~ Unsc~ 2013-05-14  11.0   2       534.      275.          0.941   275.       0.941
10 1002-3~ Unsc~ 2013-08-20  14.2   2       419.      275.          0.522   275.       0.522
# ... with 72 more rows, and 2 more variables: PROG <chr>, PRGMOS <dbl>
dput(data)
structure(list(SUBJ_ID = c("1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169", "1002-31169", 
"1002-31169", "1002-31169", "1002-31169", "1002-31169"), VISIT = c("1 Screening", 
"1 Week 04", "1 Week 08", "1 Week 16", "Unscheduled 01", "Unscheduled 02", 
"Unscheduled 03", "Unscheduled 04", "Unscheduled 05", "Unscheduled 06", 
"Unscheduled 07", "Unscheduled 08", "Unscheduled 09", "Unscheduled 10", 
"Unscheduled 11", "Unscheduled 12", "Unscheduled 13", "Unscheduled 14", 
"Unscheduled 15", "Unscheduled 16", "Unscheduled 17", "2 Screening", 
"2 Week 04", "2 Week 08", "2 Week 16", "Unscheduled 18", "Unscheduled 19", 
"Unscheduled 20", "Unscheduled 21", "Unscheduled 22", "Unscheduled 23", 
"3 Screening", "3 Week 04", "3 Week 08", "3 Week 16", "Unscheduled 24", 
"Unscheduled 25", "Unscheduled 26", "Unscheduled 27", "Unscheduled 28", 
"Unscheduled 29", "1 Screening", "1 Week 04", "1 Week 08", "1 Week 16", 
"Unscheduled 01", "Unscheduled 02", "Unscheduled 03", "Unscheduled 04", 
"Unscheduled 05", "Unscheduled 06", "Unscheduled 07", "Unscheduled 08", 
"Unscheduled 09", "Unscheduled 10", "Unscheduled 11", "Unscheduled 12", 
"Unscheduled 13", "Unscheduled 14", "Unscheduled 15", "Unscheduled 16", 
"Unscheduled 17", "2 Screening", "2 Week 04", "2 Week 08", "2 Week 16", 
"Unscheduled 18", "Unscheduled 19", "Unscheduled 20", "Unscheduled 21", 
"Unscheduled 22", "Unscheduled 23", "3 Screening", "3 Week 04", 
"3 Week 08", "3 Week 16", "Unscheduled 24", "Unscheduled 25", 
"Unscheduled 26", "Unscheduled 27", "Unscheduled 28", "Unscheduled 29"
), DOS = structure(c(15504, 15533, 15561, 15601, 15638, 15694, 
15749, 15793, 15839, 15937, 16027, 16153, 16272, 16398, 16552, 
16680, 16909, 16972, 17028, 17119, 17252, 17316, 17343, 17371, 
17427, 17490, 17553, 17609, 17665, 17756, 17840, 17862, 17896, 
17924, 17980, 17988, 18036, 18092, 18148, 18231, 18326, 15504, 
15533, 15561, 15601, 15638, 15694, 15749, 15793, 15839, 15937, 
16027, 16153, 16272, 16398, 16552, 16680, 16909, 16972, 17028, 
17119, 17252, 17316, 17343, 17371, 17427, 17490, 17553, 17609, 
17665, 17756, 17840, 17862, 17896, 17924, 17980, 17988, 18036, 
18092, 18148, 18231, 18326), class = "Date"), MOS_DUR = c(0, 
0.966666666666667, 1.87096774193548, 3.16666666666667, 4.38709677419355, 
6.2258064516129, 8, 9.51612903225806, 11.0322580645161, 14.2258064516129, 
17.1666666666667, 21.3548387096774, 25.258064516129, 29.3666666666667, 
34.4666666666667, 38.6451612903226, 46.1666666666667, 48.2333333333333, 
50.0645161290323, 53.0333333333333, 57.4516129032258, 59.5483870967742, 
60.4333333333333, 61.3548387096774, 63.1666666666667, 65.2333333333333, 
67.2903225806452, 69.1935483870968, 71.0322580645161, 74, 76.741935483871, 
77.4666666666667, 78.5806451612903, 79.4838709677419, 81.3870967741936, 
81.6451612903226, 83.2258064516129, 85.0645161290323, 86.8709677419355, 
89.6, 92.7241379310345, 0, 0.966666666666667, 1.87096774193548, 
3.16666666666667, 4.38709677419355, 6.2258064516129, 8, 9.51612903225806, 
11.0322580645161, 14.2258064516129, 17.1666666666667, 21.3548387096774, 
25.258064516129, 29.3666666666667, 34.4666666666667, 38.6451612903226, 
46.1666666666667, 48.2333333333333, 50.0645161290323, 53.0333333333333, 
57.4516129032258, 59.5483870967742, 60.4333333333333, 61.3548387096774, 
63.1666666666667, 65.2333333333333, 67.2903225806452, 69.1935483870968, 
71.0322580645161, 74, 76.741935483871, 77.4666666666667, 78.5806451612903, 
79.4838709677419, 81.3870967741936, 81.6451612903226, 83.2258064516129, 
85.0645161290323, 86.8709677419355, 89.6, 92.7241379310345), 
    READER = structure(c(2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L), .Label = c("1", 
    "2", "3", "4", "5"), class = "factor"), SPD = c(275.32, 358.49, 
    438.99, 528.38, 583.55, 572.93, 515.5, 531.5, 534.4, 419.07, 
    328.02, 145.58, 146.78, 165.25, 136.49, 200.55, 170.55, 271.51, 
    243.32, 356.41, 447.68, 526.89, 586.47, 619.36, 666.5, 625.72, 
    679.19, 639.81, 638.95, 686.85, 920.76, 992.07, 1131.35, 
    1092.71, 1405.81, 1692.02, 1553.67, 2074.41, 2056.49, 1281.96, 
    1944.41, 361.13, 488.18, 581.76, 623.45, 730.45, 639.17, 
    545.33, 564.94, 633.28, 491.78, 434.64, 203.66, 154.57, 229.18, 
    148.35, 132.52, 152.16, 288.91, 334.22, 420.01, 688.19, 645.57, 
    630.8, 683.63, 758.01, 642.23, 636.91, 691.11, 708.32, 579.91, 
    884.35, 1193.26, 1377.71, 1208.79, 1555.86, 1737.75, 1616, 
    2003.85, 2190.7, 1353.92, 2239.47), SPD_NADIR = c(275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 145.58, 145.58, 145.58, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 136.49, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 203.66, 154.57, 154.57, 148.35, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 132.52, 
    132.52), PCT_DIFF_NADIR = c(NA, 0.302084846723812, 0.594471887258463, 
    0.919148627052157, 1.11953363359, 1.08096033706233, 0.872366700566613, 
    0.930480894958594, 0.94101409269214, 0.522119715240448, 0.191413627778585, 
    -0.47123347377597, 0.00824289050693769, 0.135114713559555, 
    -0.0624398955900536, 0.469338413070555, 0.249542090995677, 
    0.989229980218331, 0.782694702908638, 1.61125357169023, 2.2799472488827, 
    2.86028280460107, 3.29679830024178, 3.53776833467653, 3.88314162209686, 
    3.58436515495641, 3.97611546633453, 3.68759616089091, 3.68129533299143, 
    4.03223679390431, 5.74598871712213, 6.26844457469412, 7.28888563264708, 
    7.00578796981464, 9.29972891786944, 11.3966590959045, 10.3830317239358, 
    14.1982562825115, 14.0669646127921, 8.39233643490366, 13.2458055535204, 
    NA, 0.35181236673774, 0.610943427574558, 0.726386619776812, 
    1.02267881372359, 0.769916650513665, 0.510065627336416, 0.564367402320494, 
    0.753606734416969, 0.36178107606679, 0.203555506327361, -0.436047960568216, 
    -0.241038986546204, 0.482693925082487, -0.040240667658666, 
    -0.106707111560499, 0.148204044672502, 1.18012375490492, 
    1.52203440990039, 2.16940839118624, 4.19310292785995, 3.87149109568367, 
    3.76003622094778, 4.15869302746755, 4.71996679746453, 3.8462873528524, 
    3.80614246906127, 4.21513733776034, 4.34500452761847, 3.37601871415635, 
    5.67333232719589, 8.00437669785693, 9.39624207666767, 8.12156655599155, 
    10.7405674615152, 12.1131150015092, 11.1943857530939, 14.1211137941443, 
    15.5310896468458, 9.21672200422578, 15.899109568367), SPD_BL = c(275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 275.32, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 361.13, 
    361.13), PCT_DIFF_BL = c(NA, 0.302084846723812, 0.594471887258463, 
    0.919148627052157, 1.11953363359, 1.08096033706233, 0.872366700566613, 
    0.930480894958594, 0.94101409269214, 0.522119715240448, 0.191413627778585, 
    -0.47123347377597, -0.466874909196571, -0.399789336045329, 
    -0.504249600464914, -0.271574894668023, -0.380539009152986, 
    -0.0138384425395903, -0.11622838878396, 0.294530001452855, 
    0.626035159087607, 0.913736742699404, 1.13013947406654, 1.24960046491356, 
    1.42081941014093, 1.27270085718437, 1.46691123056807, 1.3238776696208, 
    1.32075403167224, 1.49473340113323, 2.34432660177248, 2.60333430190324, 
    3.10921836408543, 2.9688725846288, 4.10609472613686, 5.1456486996949, 
    4.64314252506175, 6.53454162429173, 6.46945372657271, 3.65625454017144, 
    6.06236379485689, NA, 0.35181236673774, 0.610943427574558, 
    0.726386619776812, 1.02267881372359, 0.769916650513665, 0.510065627336416, 
    0.564367402320494, 0.753606734416969, 0.36178107606679, 0.203555506327361, 
    -0.436047960568216, -0.571982388613519, -0.365380887768947, 
    -0.589206103065378, -0.633040733253953, -0.578655885692133, 
    -0.199983385484451, -0.0745161022346523, 0.16304377924847, 
    0.905657242544236, 0.787638800431978, 0.746739401323623, 
    0.893030210727439, 1.09899482180932, 0.778390053443358, 0.763658516323761, 
    0.913742973444466, 0.961398942209177, 0.605820618613796, 
    1.4488411375405, 2.30423947055077, 2.81499736936837, 2.34724337496192, 
    3.30831002686013, 3.81197906571041, 3.47484285437377, 4.54883283028272, 
    5.06623653531969, 2.7491208152189, 5.20128485586908), PROG = c(NA, 
    "PDu", "PDu", "PDu", "PDu", "PDc", "PDc", "PDc", "PDc", "PDc", 
    NA, NA, NA, NA, NA, "PDc", NA, "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", NA, "PDu", "PDu", "PDu", "PDu", "PDc", "PDc", 
    "PDc", "PDc", "PDc", NA, NA, NA, "PDc", NA, NA, NA, "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", "PDc", 
    "PDc", "PDc", "PDc", "PDc", "PDc"), PRGMOS = c(NA, NA, NA, 
    NA, 4.38709677419355, 6.2258064516129, 8, 9.51612903225806, 
    11.0322580645161, 14.2258064516129, NA, NA, NA, NA, NA, 38.6451612903226, 
    NA, 48.2333333333333, 50.0645161290323, 53.0333333333333, 
    57.4516129032258, 59.5483870967742, 60.4333333333333, 61.3548387096774, 
    63.1666666666667, 65.2333333333333, 67.2903225806452, 69.1935483870968, 
    71.0322580645161, 74, 76.741935483871, 77.4666666666667, 
    78.5806451612903, 79.4838709677419, 81.3870967741936, 81.6451612903226, 
    83.2258064516129, 85.0645161290323, 86.8709677419355, 89.6, 
    92.7241379310345, NA, NA, NA, NA, 4.38709677419355, 6.2258064516129, 
    8, 9.51612903225806, 11.0322580645161, 14.2258064516129, 
    NA, NA, NA, 29.3666666666667, NA, NA, NA, 48.2333333333333, 
    50.0645161290323, 53.0333333333333, 57.4516129032258, 59.5483870967742, 
    60.4333333333333, 61.3548387096774, 63.1666666666667, 65.2333333333333, 
    67.2903225806452, 69.1935483870968, 71.0322580645161, 74, 
    76.741935483871, 77.4666666666667, 78.5806451612903, 79.4838709677419, 
    81.3870967741936, 81.6451612903226, 83.2258064516129, 85.0645161290323, 
    86.8709677419355, 89.6, 92.7241379310345)), row.names = c(NA, 
-82L), groups = structure(list(SUBJ_ID = c("1002-31169", "1002-31169"
), READER = structure(c(2L, 4L), .Label = c("1", "2", "3", "4", 
"5"), class = "factor"), .rows = structure(list(1:41, 42:82), ptype = integer(0), class = c("vctrs_list_of", 
"vctrs_vctr", "list"))), row.names = c(NA, -2L), class = c("tbl_df", 
"tbl", "data.frame"), .drop = TRUE), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"))

问题:我需要将PDu 值设置为PDc,当有连续的​​PDu 值和第一次出现和最后一次出现之间的持续时间(MOS_DUR 的差异)时是&gt;=3PDu 只能在 MOS_DUR &lt;= 6 时出现。在给出的示例中,第二次扫描(第 2 行)的 PROG 值将从 PDu 重置为 PDc

编辑 1:向 reprex 添加了一个 PROGMOS 变量,以提供更清晰的说明。本质上,如果有一个序列,last(PRGMOS) - first(PRGMOS) &gt; 3 &amp; PROG == "PDu"PDu 将变为PDc

这是我当前的代码,但它没有做我想要的。

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PROG = case_when(
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR <= 6 ~ "PDu", 
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR > 6 ~ "PDc"
          )) 

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PRGMOS = case_when(
    PROG == "PDu" & lead(PROG) == "PDc" ~ MOS_DUR,
    PROG == "PDu" & lead(PROG) == "PDu" ~ MOS_DUR,
    PROG == "PDc"
  ))  %>%
  mutate(PRGFLG = case_when(
    max(PRGMOS) - min(PRGMOS) > 3 ~ 1,
  ))

编辑 2*:下面的预期输出示例:

> output
# A tibble: 33 x 5
   SUBJ_ID    READER MOS_DUR PROG  PROG2
   <chr>      <chr>    <dbl> <chr> <chr>
 1 1001-31169 4        0     NA    NA   
 2 1001-31169 4        0.903 PDu   PDc  
 3 1001-31169 4        2     PDu   PDc  
 4 1001-31169 4        3.58  PDu   PDc  
 5 1001-31169 4        5.42  PDu   PDc  
 6 1001-31169 4        7.26  NA    NA   
 7 1001-31169 4        9.07  NA    NA   
 8 1001-31169 4       11.0   NA    NA   
 9 1001-31169 4       14.2   NA    NA   
10 1001-31169 4       17.7   NA    NA   
# ... with 23 more rows

如您在上面的示例中所见,PDu 已转换为 PDc,因为 5.42-0.903 &gt;3 和所有 PDu 都是按顺序排列的。

【问题讨论】:

  • 不清楚你的情况。可能是library(dplyr);library(data.table);df1 %&gt;% group_by(SUBJ_ID, grp = rleid(PROG)) %&gt;% mutate(PROG2 = case_when(all(PROG == 'PDu') &amp; (last(MOS_DUR) - first(MOS_DUR)) &gt;= 3 ~ 'PDc', TRUE ~ PROG) ) %&gt;% ungroup
  • 如果序列中第一个连续的PDu 和最后一个PDu 之间的持续时间(MOS_DUR)为>3,我想将PROG 的值更改为PDc。跨度>
  • @akrun 添加了一个示例输出。希望这会增加更多的清晰度。
  • 在最初的“数据”中。第 6 行以后是 PDc,您是否在预期输出中转换为 NA
  • 如果不满足条件,我想将它们留空。我将使用coalesce 用不同的响应变量来填补缺失。

标签: r


【解决方案1】:

基于@akrun 代码,这是我使用的解决方案:

data <- data %>%
  group_by(SUBJ_ID, READER) %>%
  mutate(PROG = case_when(
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR <= 6 ~ "PDu", 
           PCT_DIFF_NADIR >= 0.25 & MOS_DUR > 6 ~ "PDc"
          )) 

data <- data %>% 
  group_by(SUBJ_ID, READER, grp = rleid(PROG)) %>% 
  mutate(PROG2 = case_when(
    all(PROG == 'PDu') & (last(MOS_DUR) - first(MOS_DUR)) >= 3 ~ 'PDc', 
    TRUE ~ PROG)) %>% 
    ungroup()

除了其他变量之外,我还需要group_by(READER)。再次感谢@akrun!

【讨论】:

    猜你喜欢
    • 2017-07-15
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2018-01-10
    • 1970-01-01
    • 1970-01-01
    相关资源
    最近更新 更多