【问题标题】:swapping rows and columns in R在R中交换行和列
【发布时间】:2019-11-16 19:38:31
【问题描述】:

我有一个看起来像这样的表:

> head(test,10)
# A tibble: 10 x 16
   Question_1 Question_2 Question_3 Question_4 Question_5 Question_6 Question_7 Question_8 Question_9
   <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>      <chr>     
 1 B          C          C          E          C          A          C          E          C         
 2 C          C          C          B          C          A          E          D          C         
 3 B          C          C          E          C          A          C          E          C         
 4 C          C          C          D          C          A          C          D          C         
 5 B          B          C          B          A          A          A          D          C         
 6 C          C          C          E          BLANK      A          C          E          C         
 7 C          C          C          E          C          A          E          E          C         
 8 B          C          C          E          C          A          C          D          C         
 9 C          C          C          E          C          A          C          D          C         
10 D          C          E          B          C          A          A          D          C  

并且想要转置,所以我在每行和 6 个单独的列中得到一个问题 A,B,C,D,E,BLANKS

【问题讨论】:

    标签: r datatable count dplyr


    【解决方案1】:

    我们可以将gather转换成'long'格式,将'key'、'value'列的countspread转换成'wide'格式

    library(tidyverse)
    gather(test) %>% 
        count(key, value) %>%
        spread(value, n, fill = 0)
    

    或使用melt/dcast

    library(data.table)
    dcast(melt(setDT(test), measure = patterns("^Question")), variable ~ value)
    

    或在base R 中通过复制'test' 的列名而不循环,而unlisting 'test' 并获取table

    table(names(test)[col(test)], unlist(test))       
    #              A  B BLANK  C  D  E
    #  Question_1  0  4     0  5  1  0
    #  Question_2  0  1     0  9  0  0
    #  Question_3  0  0     0  9  0  1
    #  Question_4  0  3     0  0  1  6
    #  Question_5  1  0     1  8  0  0
    #  Question_6 10  0     0  0  0  0
    #  Question_7  2  0     0  6  0  2
    #  Question_8  0  0     0  0  6  4
    #  Question_9  0  0     0 10  0  0
    

    注意:没有必要用循环来欺骗

    基准测试

    df2 <- test[rep(seq_len(nrow(test)), 1e5), ]
    
    system.time({
    vals <- unique(unlist(df2))
    t(sapply(df2, function(x) table(factor(x, levels = vals))))
    
    
    })
    # user  system elapsed 
     # 6.987   0.367   7.293 
    
     system.time({
     table(names(df2)[col(df2)], unlist(df2))  
    
     })
    # user  system elapsed 
    #   6.355   0.407   6.720 
    
    
    system.time({
    gather(df2) %>% 
        count(key, value) %>%
        spread(value, n, fill = 0)
    
    })
    # user  system elapsed 
    # 0.567   0.125   0.695
    
    
    system.time({
    dcast(melt(setDT(df2), measure = patterns("^Question")), variable ~ value)
    
    })
    #  user  system elapsed 
    #  0.789   0.018   0.195 
    

    数据

    test <- structure(list(Question_1 = c("B", "C", "B", "C", "B", "C", "C", 
    "B", "C", "D"), Question_2 = c("C", "C", "C", "C", "B", "C", 
    "C", "C", "C", "C"), Question_3 = c("C", "C", "C", "C", "C", 
    "C", "C", "C", "C", "E"), Question_4 = c("E", "B", "E", "D", 
    "B", "E", "E", "E", "E", "B"), Question_5 = c("C", "C", "C", 
    "C", "A", "BLANK", "C", "C", "C", "C"), Question_6 = c("A", "A", 
    "A", "A", "A", "A", "A", "A", "A", "A"), Question_7 = c("C", 
    "E", "C", "C", "A", "C", "E", "C", "C", "A"), Question_8 = c("E", 
    "D", "E", "D", "D", "E", "E", "D", "D", "D"), Question_9 = c("C", 
    "C", "C", "C", "C", "C", "C", "C", "C", "C")), 
    class = "data.frame", row.names = c("1", 
    "2", "3", "4", "5", "6", "7", "8", "9", "10"))
    

    【讨论】:

      【解决方案2】:

      一个基本的 R 技巧可能是获取数据帧的所有唯一值并使用 sapply 并计算列中每个值的频率。

      vals <- unique(unlist(test))
      t(sapply(test, function(x) table(factor(x, levels = vals))))
      
      #           B  C D E  A BLANK
      #Question_1 4  5 1 0  0     0
      #Question_2 1  9 0 0  0     0
      #Question_3 0  9 0 1  0     0
      #Question_4 3  0 1 6  0     0
      #Question_5 0  8 0 0  1     1
      #Question_6 0  0 0 0 10     0
      #Question_7 0  6 0 2  2     0
      #Question_8 0  0 6 4  0     0
      #Question_9 0 10 0 0  0     0
      

      【讨论】:

        猜你喜欢
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 2020-02-19
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        • 1970-01-01
        相关资源
        最近更新 更多