【问题标题】:Filtering dates for time series plot using dplyr使用 dplyr 过滤时间序列图的日期
【发布时间】:2018-11-01 10:08:21
【问题描述】:

我正在尝试过滤我的数据集,使其仅包含 1996-2015 年的行,并且仅包含 orgid、stdate、locid、charnam 列。整个数据集包含从 1988 年到 2015 年的年份以及大量不同的列。我最近刚刚了解了 dplyr 包,并认为这将是可行的方法。但是,我不断收到此错误,我不明白为什么会收到此错误。我不断收到的错误是Error in is_character(x) : object 'rlang_is_character' not found

这是我目前的代码:

    ########## download necessary packages to make script run #########################################################################
if (!require(pacman)) {
  install.packages('pacman')

}

pacman::p_load("tidyverse")
#### Read in the necessary data ######
roadsalt_data<-read.table("QADportaldata_1988-2015.tsv",header=T,sep="\t",fill=T,stringsAsFactors = F)
#Convert date column from a character class to a date class so ggplot can  display as a continuous variable ###
roadsalt_data$stdate <- as.Date(roadsalt_data$stdate)
## Filter dataset to only contain years 1996-2015 ########
roadsalt_data_sub<-roadsalt_data %>%
                     select(orgid,stdate,locid,charnam) %>%
                     filter(between(roadsalt_data, as.Date("1996-01-01"), as.Date("2015-07-01")))

这是数据集的预览:

structure(list(orgid = c("USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", 
"USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ", "USGS-NJ"
), actid = c("nwisnj.01.01300274", "nwisnj.01.01300273", "nwisnj.01.01300247", 
"nwisnj.01.01300242", "nwisnj.01.01300238", "nwisnj.01.01300237", 
"nwisnj.01.01300189", "nwisnj.01.01300189", "nwisnj.01.01300189", 
"nwisnj.01.01300190"), actyp = c("Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine", 
"Sample-Routine", "Sample-Routine", "Sample-Routine", "Sample-Routine"
), stdate = structure(c(15755, 15755, 15748, 15748, 15748, 15748, 
15740, 15740, 15740, 15740), class = "Date"), sttime = c("09:30:00", 
"11:00:00", "10:30:00", "12:00:00", "11:00:00", "11:30:00", "09:25:00", 
"09:25:00", "09:25:00", "09:30:00"), actdep = c(NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_), actdepun = c("", "", "", "", "", "", "", "", "", ""
), locid = c("USGS-01407760", "USGS-01445030", "USGS-01380075", 
"USGS-01368820", "USGS-01409815", "USGS-01411400", "USGS-01458570", 
"USGS-01458570", "USGS-01458570", "USGS-01445160"), actcom = c("A-0520044 TPCN = 64mL filtered", 
"A-0520046 TPCN = 124mL filtered", "A-0460036 TPCN = 124mL filtered L-0460036 Received February 14, 2013", 
"A-0460025 TPCN = 125mL filtered L-0460025 Received February 14, 2013", 
"A-0460027 TPCN = 64mL filtered.  ATTN: H.Ardourel, LL ANC and LL pH L-0460027 Received February 14, 2013", 
"A-0460028 TPCN = 125mL filtered. L-0460028 Received February 14, 2013", 
"A-0370012 TPCN = 125mL filtered", "A-0370012 TPCN = 125mL filtered", 
"A-0370012 TPCN = 125mL filtered", "A-0370011 TPCN = 125mL filtered"
), hydcond = c("Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, normal stage", "Stable, high stage", 
"Falling stage", "Stable, normal stage", "Stable, normal stage", 
"Stable, normal stage", "Stable, high stage"), hydev = c("Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample", "Routine sample", "Routine sample", "Routine sample", 
"Routine sample"), metcont = c("USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398", "USGS parameter code 82398", 
"USGS parameter code 82398", "USGS parameter code 82398"), metnam = c("Multiple verticals", 
"Multiple verticals", "Multiple verticals", "Multiple verticals", 
"Single vertical", "Multiple verticals", "Grab sample  (dip)", 
"Grab sample  (dip)", "Grab sample  (dip)", "Multiple verticals"
), detcond = c("", "", "", "", "Not Detected", "", "", "", "", 
""), charnam = c("Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)", 
"Inorganic nitrogen (nitrate and nitrite)", "Phosphorus", "Phosphorus", 
"Kjeldahl nitrogen"), samfrac = c("Dissolved", "Dissolved", "Dissolved", 
"Dissolved", "Dissolved", "Dissolved", "Dissolved", "Total", 
"Dissolved", "Dissolved"), val = c("0.84", "1.2", "0.46", "0.28", 
"", "0.66", "3.10", "0.032", "0.028", "0.21"), valunit = c("mg/l", 
"mg/l", "mg/l", "mg/l", "", "mg/l", "mg/l as N", "mg/l as P", 
"mg/l as P", "mg/l as N"), valqual = c("", "", "", "", "", "", 
"", "", "", ""), valstat = c("Accepted", "Accepted", "Accepted", 
"Accepted", "Accepted", "Accepted", "Accepted", "Accepted", "Accepted", 
"Accepted"), statcode = c("", "", "", "", "", "", "", "", "", 
""), valtype = c("Actual", "Actual", "Actual", "Actual", "Actual", 
"Actual", "Actual", "Actual", "Actual", "Actual"), precval = c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_), valcom = c("", "", "", "", "", "", "Report level code updated Oct., Nov. 2015. Reference: NWQL Rapi-Note 2011-21 (RLC: IRL => LT-MDL)", 
"", "", ""), valdep = c(NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), 
    valdepun = c("", "", "", "", "", "", "", "", "", ""), valmetnam = c("Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "Computation by NWIS algorithm", 
    "Computation by NWIS algorithm", "NO3+NO2, wf, FCC,NaR, DA", 
    "P, wu, WCA, persulfate CF color", "P, wf, FCA, persulfate CF color", 
    "NH3+org-N, wf, FCA, Kjeldahl, CF"), metdesc = c("NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "NWIS User's Manual, QW System, Section 3.6.7", 
    "NWIS User's Manual, QW System, Section 3.6.7", "", "", "", 
    "USGS OF 00-170"), labcom = c("", "", "", "", "", "", "", 
    "", "", ""), detlimnam = c("", "", "", "", "Historical Lower Reporting Limit", 
    "", "Long Term Method Detection Level", "Long Term Method Detection Level", 
    "Long Term Method Detection Level", "Long Term Method Detection Level"
    ), detlimval = c("", "", "", "", "0.23", "", "0.04", "0.004", 
    "0.0040", "0.07"), detlimun = c("", "", "", "", "mg/l", "", 
    "mg/l as N", "mg/l as P", "mg/l as P", "mg/l as N"), V63 = c("NWIS", 
    "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", "NWIS", 
    "NWIS")), .Names = c("orgid", "actid", "actyp", "stdate", 
"sttime", "actdep", "actdepun", "locid", "actcom", "hydcond", 
"hydev", "metcont", "metnam", "detcond", "charnam", "samfrac", 
"val", "valunit", "valqual", "valstat", "statcode", "valtype", 
"precval", "valcom", "valdep", "valdepun", "valmetnam", "metdesc", 
"labcom", "detlimnam", "detlimval", "detlimun", "V63"), row.names = c(NA, 
10L), class = "data.frame")

任何帮助将不胜感激!提前致谢!

【问题讨论】:

  • 您拥有哪个版本的rlang?重装rlang到新版本后试试
  • 我有版本'0.2.0.9001' @akrun
  • 在过滤步骤中,您使用的是整个数据集而不是列?应该是filter(between(stdate, as.Date("1996-01-01"), as.Date("2015-07-01")))
  • 我不确定是否建议升级 R 版本以尝试解决依赖性问题,R 3.5.0 的更新仍然相对较新,并且有些软件包在获取更新版本时遇到了问题在 CRAN 上发布 3.5.0。兼容性问题存在于 dplyr rlang 工具链中,因为当我使用 R 3.4.3、dplyr 0.7.4 和 rlang 0.2.0.9000 运行 @akrun 的 dplyr 代码时,它适用于我。
  • 我总是安装单独的包而不是组合包

标签: r date dplyr tidyverse


【解决方案1】:

如果我没记错的话,between() 在某一时刻不能与 Dates 一起使用,即使 leftright 参数是用 as.Date() 转换的。

这里有一些替代方案。由于您的所有样本数据都在指定年份之间,因此这些数据都过滤了 2013-02-04 和 2013-02-12 之间的日期。相应调整。

library(dplyr)

roadsalt_data <- as_tibble(roadsalt_data) # not necessary, just convenient console output 

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  filter(stdate >= "2013-02-04", stdate <= "2013-02-12")

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  filter(between(stdate, as.Date("2013-02-04"), as.Date("2013-02-12")))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# How I would've done it
library(lubridate)

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>% 
  # filter(between(year(stdate), 1996, 2015)) # for years instead of days
  filter(between(day(stdate), 4, 12))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# If {lubridate} isn't installed, this is all year() and day() do...
get_day <- function(date) as.POSIXlt(date, tz = tz(date))$mday
# get_year <- function(date) as.POSIXlt(date, tz = tz(date))$year + 1900 # for years instead of days

roadsalt_data %>% 
  select(orgid, stdate, locid, charnam) %>%
  # filter(between(get_year(stdate), 1996, 2015)) # for years instead of days
  filter(between(get_day(stdate), 4, 12))

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

# Base R
roadsalt_data <- roadsalt_data[, c("orgid", "stdate", "locid", "charnam")]
roadsalt_data[roadsalt_data$stdate >= as.Date("2013-02-04") & roadsalt_data$stdate <= as.Date("2013-02-12") ,]

#> # A tibble: 8 x 4
#>   orgid   stdate     locid         charnam                                
#>   <chr>   <date>     <chr>         <chr>                                  
#> 1 USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), or~
#> 2 USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), or~
#> 3 USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), or~
#> 4 USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), or~
#> 5 USGS-NJ 2013-02-04 USGS-01458570 Inorganic nitrogen (nitrate and nitrit~
#> 6 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 7 USGS-NJ 2013-02-04 USGS-01458570 Phosphorus                             
#> 8 USGS-NJ 2013-02-04 USGS-01445160 Kjeldahl nitrogen

reprex package (v0.2.0) 于 2018 年 5 月 23 日创建。

================================================ ===============

如果这些都不起作用,则完全是其他事情。

【讨论】:

    【解决方案2】:

    如果你厌倦了与 tidyverse 依赖地狱作斗争,你可以试试 data.table 代替:

    library(data.table)
    
    ## Convert to a data.table by reference
    setDT(roadsalt_data) 
    
    ## Filter on date and only return a subset of columns
    roadsalt_data[between(stdate, as.Date("1996-01-01"), as.Date("2015-07-01")), .(orgid,stdate,locid,charnam)]
    
    #       orgid     stdate         locid                                                      charnam
    #  1: USGS-NJ 2013-02-19 USGS-01407760 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  2: USGS-NJ 2013-02-19 USGS-01445030 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  3: USGS-NJ 2013-02-12 USGS-01380075 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  4: USGS-NJ 2013-02-12 USGS-01368820 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  5: USGS-NJ 2013-02-12 USGS-01409815 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  6: USGS-NJ 2013-02-12 USGS-01411400 Nitrogen, mixed forms (NH3), (NH4), organic, (NO2) and (NO3)
    #  7: USGS-NJ 2013-02-04 USGS-01458570                     Inorganic nitrogen (nitrate and nitrite)
    #  8: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
    #  9: USGS-NJ 2013-02-04 USGS-01458570                                                   Phosphorus
    # 10: USGS-NJ 2013-02-04 USGS-01445160                                            Kjeldahl nitrogen
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2012-03-03
      • 2016-03-17
      • 2019-11-07
      • 1970-01-01
      • 1970-01-01
      • 2017-09-06
      相关资源
      最近更新 更多