【问题标题】:R: comparing a column with values in another column present as list objectR:将列与作为列表对象存在的另一列中的值进行比较
【发布时间】:2019-07-05 12:54:16
【问题描述】:

我使用 for 循环创建了DF1,或者从某人那里得到了它。我想检查DF2 是否有相应日期的值(作为列表或连接在字符串中)。我经常遇到这种情况。 这是代码。

library(dplyr)
library(magrittr)
DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
  mutate(list_column = strsplit(nos,split = "\\|")) %>% 
  print
# DF1
# det1 no   nos
# 1 2013-02-02  1   1|3
# 2 2018-01-11  2 4|2|1
for(i_ in 1:nrow(DF1)){
  # i_ = 1
  temp = DF1[i_,]
  list_vals = temp$list_column %>% as.vector() %>% unlist() %>% as.numeric() %>% print
  DF1$present[i_] = temp$no %in% list_vals
    }
#R>DF1
#        det1 no   nos list_column present
#1 2013-02-02  1   1|3        1, 3    TRUE
#2 2018-01-11  3 4|2|1     4, 2, 1   FALSE

如果nonos 之一,那么创建另一个逻辑列的最佳方法是什么?如何实现我想要做的事情或更好地完成我想要最终得到的事情? 我欢迎任何解决方案 basetidyversedata.table

EDIT-1

我正在寻找消除for 循环。

【问题讨论】:

    标签: r data.table tidyverse


    【解决方案1】:

    ** 代码的第一部分 **

    library(dplyr)
    DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
    DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
    DF1 = left_join(DF2,DF1,by=c('det1'='det')) # no need to strsplit()
    

    使用greplapply,只需一行:

    DF1$present <- apply(DF1, 1, function(x){
      ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
    })
    

    结果:

             det1 no   nos  present
    1: 2013-02-02  1   1|3  TRUE
    2: 2018-01-11  3 4|2|1 FALSE
    

    此解决方案可“移植”到data.table,例如:

    library(data.table)
    
    data.table::setDT(DF1) # into data.table
    
    DF1[, present := apply(DF1, 1, function(x){ 
      ifelse(grepl(x=x[3], pattern = paste0("(?<!\\d)", x[2], "(?!\\d)"), perl = TRUE), T, F)
    })] # the := is a symbol for assignment
    

    【讨论】:

      【解决方案2】:

      我们可以使用Map 循环遍历'list_column',检查lengthintersecting 元素

      library(tidyverse)
      DF1 %>%
         mutate(present = map2(list_column, DF2$no, ~ length(intersect(.x, .y))) > 0)
      #        det1 no   nos list_column present
      #1 2013-02-02  1   1|3        1, 3    TRUE
      #2 2018-01-11  3 4|2|1     4, 2, 1   FALSE
      

      或者在没有匿名函数调用的情况下稍微紧凑

      DF1 %>%
         mutate(present = lengths(map2(list_column, DF2$no, intersect)) > 0)
      

      【讨论】:

        【解决方案3】:
        library(data.table)
        setDT(DF1)
        DF1[, present := as.character(no) %in% list_column[[1]], by = seq_len(nrow(DF1))][]
        
                 det1 no   nos list_column present
        1: 2013-02-02  1   1|3         1,3    TRUE
        2: 2018-01-11  3 4|2|1       4,2,1   FALSE
        

        数据(只需在一处添加 as.character())

        DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4")) %>% print
        DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')),no = c(1,3)) %>% print
        DF1 = left_join(DF2,DF1,by=c('det1'='det')) %>%
          mutate(list_column = strsplit(as.character(nos),split = "\\|"))
        

        【讨论】:

          【解决方案4】:

          我发现grepl 在这种情况下很有用。

          DF3 = left_join(DF2, DF1, by=c('det1'='det'))
          for(i in 1:nrow(DF3)){
            DF3[i, 'present'] = grepl(DF3[i,'no'], DF3[i, 'nos'])
          }
          
          > DF3
                  det1 no   nos present
          1 2013-02-02  1   1|3    TRUE
          2 2018-01-11  3 4|2|1   FALSE
          

          数据(添加stringsAsFactors = F):

          DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),nos = c("1|3","4|2|1","3|4"), stringsAsFactors = F)
          DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')), no = c(1,3), stringsAsFactors = F)
          

          【讨论】:

            【解决方案5】:

            另一个使用data.table::tstrsplit的选项:

            library(data.table)
            df1 <- setDT(DF1)[, .(no=as.integer(unlist(tstrsplit(nos, "\\|")))), by=.(det)]
            setDT(DF2)[, present := FALSE][
                df1, on=c("det1"="det", "no"), present := !is.na(i.no)]
            

            输出:

                     det1 no present
            1: 2013-02-02  1    TRUE
            2: 2018-01-11  3   FALSE
            

            数据:

            DF1 <- data.frame(det = as.Date(c('2013-02-02','2018-01-11','2011-07-10')),
                nos = c("1|3","4|2|1","3|4"))
            DF2 <- data.frame(det1 = as.Date(c('2013-02-02','2018-01-11')), 
                no = as.integer(c(1,3)))
            

            【讨论】:

              猜你喜欢
              • 1970-01-01
              • 1970-01-01
              • 2018-04-06
              • 1970-01-01
              • 1970-01-01
              • 1970-01-01
              • 1970-01-01
              • 1970-01-01
              • 1970-01-01
              相关资源
              最近更新 更多