【问题标题】:Convert data frame from wide to long with 2 variables使用 2 个变量将数据帧从宽转换为长
【发布时间】:2014-02-27 15:29:51
【问题描述】:

我有以下宽数据框(mydf.wide):

DAY JAN F1  FEB F2  MAR F3  APR F4  MAY F5  JUN F6  JUL F7  AUG F8  SEP F9  OCT F10 NOV F11 DEC F12
1   169 0   296 0   1095    0   599 0   1361    0   1746    0   2411    0   2516    0   1614    0   908 0   488 0   209 0
2   193 0   554 0   1085    0   1820    0   1723    0   2787    0   2548    0   1402    0   1633    0   897 0   411 0   250 0
3   246 0   533 0   1111    0   1817    0   2238    0   2747    0   1575    0   1912    0   705 0   813 0   156 0   164 0
4   222 0   547 0   1125    0   1789    0   2181    0   2309    0   1569    0   1798    0   1463    0   878 0   241 0   230 0

我想制作以下“半长”:

DAY variable_month value_month value_F
1 JAN 169 0

我试过了:

library(reshape2)
mydf.long <- melt(mydf.wide, id.vars=c("YEAR","DAY"), measure.vars=c("JAN","FEB","MAR","APR","MAY","JUN","JUL","AUG","SEP","OCT","NOV","DEC"))

但这跳过了F变量,我不知道如何处理两个变量...

【问题讨论】:

    标签: r reshape2 melt


    【解决方案1】:

    这是基数 R 中的 reshape(...) 是更好选择的情况之一。

    months    <- c(2,4,6,8,10,12,14,16,18,20,22,24)   # column numbers of months
    F         <- c(3,5,7,9,11,13,15,17,19,21,23,25)   # column numbers of Fn
    mydf.long <- reshape(mydf.wide,idvar=1,
                 times=colnames(mydf.wide)[months],
                 varying=list(months,F),
                 v.names=c("value_month","value_F"),
                 direction="long")
    colnames(mydf.long)[2] <- "variable_month"
    head(mydf.long)
    #       DAY variable_month value_month value_F
    # 1.JAN   1            JAN         169       0
    # 2.JAN   2            JAN         193       0
    # 3.JAN   3            JAN         246       0
    # 4.JAN   4            JAN         222       0
    # 1.FEB   1            FEB         296       0
    # 2.FEB   2            FEB         554       0
    

    你也可以通过两次调用melt(...)来做到这一点

    library(reshape2)
    months    <- c(2,4,6,8,10,12,14,16,18,20,22,24)   # column numbers of months
    F         <- c(3,5,7,9,11,13,15,17,19,21,23,25)   # column numbers of Fn
    z.1 <- melt(mydf.wide,id=1,measure=months,
                variable.name="variable_month",value.name="value_month")
    z.2 <- melt(mydf.wide,id=1,measure=F,value.name="value_F")
    mydf.long <- cbind(z.1,value_F=z.2$value_F)
    head(mydf.long)
    #   DAY variable_month value_month z.2$value_F
    # 1   1            JAN         169           0
    # 2   2            JAN         193           0
    # 3   3            JAN         246           0
    # 4   4            JAN         222           0
    # 5   1            FEB         296           0
    # 6   2            FEB         554           0
    

    【讨论】:

      【解决方案2】:

      melt()dcast() 可从 reshape2data.table 包中获得。 data.table 的最新版本允许 melt multiple columns simultaneouslypatterns()参数可以通过正则表达式指定两组列:

      library(data.table)   # CRAN version 1.10.4 used
      regex_month <- toupper(paste(month.abb, collapse = "|"))
      mydf.long <- melt(setDT(mydf.wide), measure.vars = patterns(regex_month, "F\\d"),
                        value.name = c("MONTH", "F"))
      # rename factor levels
      mydf.long[, variable := forcats::lvls_revalue(variable, toupper(month.abb))][]
      
          DAY variable MONTH F
       1:   1      JAN   169 0
       2:   2      JAN   193 0
       3:   3      JAN   246 0
       4:   4      JAN   222 0
       5:   1      FEB   296 0
      ...
      44:   4      NOV   241 0
      45:   1      DEC   209 0
      46:   2      DEC   250 0
      47:   3      DEC   164 0
      48:   4      DEC   230 0
          DAY variable MONTH F
      

      注意"F\\d"patterns() 中用作正则表达式。一个简单的"F" 会捕获FEB 以及F1F2 等,从而产生意想不到的结果。

      另请注意,mydf.wide 需要强制转换为 data.table 对象。否则,reshape2::melt() 将被分派到无法识别 patterns() 的 data.frame 对象上。

      数据

      library(data.table)
      mydf.wide <- fread(
      "DAY JAN F1  FEB F2  MAR F3  APR F4  MAY F5  JUN F6  JUL F7  AUG F8  SEP F9  OCT F10 NOV F11 DEC F12
        1   169 0   296 0   1095    0   599 0   1361    0   1746    0   2411    0   2516    0   1614    0   908 0   488 0   209 0
        2   193 0   554 0   1085    0   1820    0   1723    0   2787    0   2548    0   1402    0   1633    0   897 0   411 0   250 0
        3   246 0   533 0   1111    0   1817    0   2238    0   2747    0   1575    0   1912    0   705 0   813 0   156 0   164 0
        4   222 0   547 0   1125    0   1789    0   2181    0   2309    0   1569    0   1798    0   1463    0   878 0   241 0   230 0",
      data.table = FALSE)
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 2021-12-21
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2023-03-24
        • 2022-01-24
        • 1970-01-01
        • 2020-08-11
        相关资源
        最近更新 更多