【发布时间】:2021-03-08 07:50:06
【问题描述】:
我有数百个(*.txt 格式)数据文件,其中 A 列作为“基因 ID”,B 列作为“计数”。我想通过“基因 ID”将所有文件合并为一种 *.csv 文件格式,并通过 *.csv 文件中相应的 *.txt 文件名命名后续计数列(B、C D 列等)。请帮我解决这个问题。
*.txt 格式的输入文件示例:
Sample_File_1
dput(head(Sample_File_1))
structure(list(`Gene IDs` = c("ENSG00000000003", "ENSG00000000005",
"ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938"
), Sample_File_1.counts = c(0L, 0L, 8L, 10L, 1L, 242L)), row.names = c(NA,
6L), class = "data.frame")
Sample_File_2
dput(head(Sample_File_2))
structure(list(`Gene IDs` = c("ENSG00000000003", "ENSG00000000005",
"ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938"
), Sample_File_2.counts = c(0L, 0L, 18L, 21L, 3L, 413L)), row.names = c(NA,
6L), class = "data.frame")
Sample_File_3
dput(head(Sample_File_3))
structure(list(`Gene IDs` = c("ENSG00000000003", "ENSG00000000005",
"ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938"
), Sample_File_3.counts = c(0L, 0L, 24L, 13L, 2L, 400L)), row.names = c(NA,
6L), class = "data.frame")
Sample_File_4
dput(head(Sample_File_4))
structure(list(`Gene IDs` = c("ENSG00000000003", "ENSG00000000005",
"ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938"
), Sample_File_4.counts = c(0L, 0L, 7L, 7L, 0L, 403L)), row.names = c(NA,
6L), class = "data.frame")
输出文件示例:
library(tidyverse)
Combined_inner_join <- list(Sample_File_1, Sample_File_2, Sample_File_3, Sample_File_4) %>% reduce(inner_join, by = "Gene IDs")
dput(head(Combined_inner_join))
structure(list(`Gene IDs` = c("ENSG00000000003", "ENSG00000000005",
"ENSG00000000419", "ENSG00000000457", "ENSG00000000460", "ENSG00000000938"
), Sample_File_1.counts = c(0L, 0L, 8L, 10L, 1L, 242L), Sample_File_2.counts = c(0L,
0L, 18L, 21L, 3L, 413L), Sample_File_3.counts = c(0L, 0L, 24L,
13L, 2L, 400L), Sample_File_4.counts = c(0L, 0L, 7L, 7L, 0L,
403L)), row.names = c(NA, 6L), class = "data.frame")
谢谢,
图菲克
【问题讨论】: