分隔名称并在 R 中插入新列答案

【问题标题】：Separate names and insert in new columns in R分隔名称并在 R 中插入新列
【发布时间】：2017-01-17 16:09:32
【问题描述】：

我想将数据框中的每个名称分开并将其插入相应的列中。下面是一个例子：采用以下数据框：

    N0H0 <-c(rep("N0H0",3))
    N1H1 <-c(rep("N1H1",5))
    N0H05<-c(rep("N0H05",4))
    Out20<-c(rep("20_Out",2))
    Out10<-c(rep("10_Out",4))
    In5  <-c(rep("5_In",3))
    In15 <-c(rep("15_In",3))

    df <- data.frame (Field =c(N0H0,N1H1,N0H05),
              Border =c(Out20,Out10,In5,In15),
              N      = NA,
              H      = NA,
              Number = NA,
              Out.In = NA
                )

并希望获得以下输出：

     output <-  data.frame (Field = c(N0H0,N1H1,N0H05),
                    Border = c(Out20,Out10,In5,In15),
                    N = c(c(rep("N0",3)),c(rep("N1",5)),c(rep("N0",4))),
                    H = c(c(rep("H0",3)),c(rep("H1",5)),c(rep("H05",4))),
                    Number = c(c(rep("20",2)),c(rep("10",4)),c(rep("5",3)),c(rep("15",3))),
                    Out.In = c(c(rep("Out",6)),c(rep("In",6))))

【问题讨论】：

标签： r dataframe split pattern-matching

【解决方案1】：

我们可以使用extract、separate 和tidyr 来分隔列

library(dplyr)
library(tidyr)
df %>% 
   select(Field, Border) %>%
   extract(Field, into = c("N", "H"), "^([^0-9]*\\d+)(.*)", remove = FALSE) %>% 
   separate(Border, into = c("Number", "Out.In"), remove = FALSE) %>%
   select_(.dots = names(output))
#    Field Border  N   H Number Out.In
#1   N0H0 20_Out N0  H0     20    Out
#2   N0H0 20_Out N0  H0     20    Out
#3   N0H0 10_Out N0  H0     10    Out
#4   N1H1 10_Out N1  H1     10    Out
#5   N1H1 10_Out N1  H1     10    Out
#6   N1H1 10_Out N1  H1     10    Out
#7   N1H1   5_In N1  H1      5     In
#8   N1H1   5_In N1  H1      5     In
#9  N0H05   5_In N0 H05      5     In
#10 N0H05  15_In N0 H05     15     In
#11 N0H05  15_In N0 H05     15     In
#12 N0H05  15_In N0 H05     15     In

或使用base R，使用lapply 循环前两列，使用sub 创建分隔符，使用read.csv、cbind 读取字符串list 元素并将其分配回列除了前 2 个

df[-(1:2)] <-  do.call(cbind, lapply(df[1:2], 
        function(x) read.csv(text=sub("(\\d+)_*", "\\1,", x), 
        header=FALSE, stringsAsFactors=FALSE)))
df
#   Field Border  N   H Number Out.In
#1   N0H0 20_Out N0  H0     20    Out
#2   N0H0 20_Out N0  H0     20    Out
#3   N0H0 10_Out N0  H0     10    Out
#4   N1H1 10_Out N1  H1     10    Out
#5   N1H1 10_Out N1  H1     10    Out
#6   N1H1 10_Out N1  H1     10    Out
#7   N1H1   5_In N1  H1      5     In
#8   N1H1   5_In N1  H1      5     In
#9  N0H05   5_In N0 H05      5     In
#10 N0H05  15_In N0 H05     15     In
#11 N0H05  15_In N0 H05     15     In
#12 N0H05  15_In N0 H05     15     In

【讨论】：

【解决方案2】：

使用base R函数你可以试试：

output <- df
output[, 5:6] <- do.call(rbind, strsplit(as.character(df$Border), "_"))
output[, 3:4] <- do.call(rbind, strsplit(as.character(df$Field), "H"))
output$H <- paste0("H", output$H)
output
   Field Border  N   H Number Out.In
1   N0H0 20_Out N0  H0     20    Out
2   N0H0 20_Out N0  H0     20    Out
3   N0H0 10_Out N0  H0     10    Out
4   N1H1 10_Out N1  H1     10    Out
5   N1H1 10_Out N1  H1     10    Out
6   N1H1 10_Out N1  H1     10    Out
7   N1H1   5_In N1  H1      5     In
8   N1H1   5_In N1  H1      5     In
9  N0H05   5_In N0 H05      5     In
10 N0H05  15_In N0 H05     15     In
11 N0H05  15_In N0 H05     15     In
12 N0H05  15_In N0 H05     15     In

【讨论】：