【发布时间】:2020-04-13 18:31:50
【问题描述】:
我正在尝试弄清楚如何使用 dplyr 和 pivot_longer 将政党名称数据集从宽到长重塑。
对于每个Party_ID,附加了许多常量列(Party_Name_Short, Party_Name, Country, Party_in_orig_title) 以及许多时间变化因素:election, Date, Rename, Reason, Party_Title, alliance, member_parties, split, parent_party, merger, child_party, successor, predecessor。每一方的时间变化因子最多记录 11 次,如 colname 中的索引所反映的那样。
为了提供样本,我为每一方选择了前三个时间变化的列和 5 个随机行的样本:
structure(list(Party_Name_Short = c("LZJ-PS", "ZiZi", "MNR",
"MDP", "E200"), Party_Name = c("Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200"), Country = c("SVN", "HRV", "FRA", "PRT",
"EST"), Party_ID = c(1987, 2612, 1263, 1281, 2720), Party_in_orig_title = c(0,
0, 0, 0, 0), Date1 = c(2011, NA, 1999, 1987, NA), Rename1 = c("Lista Zorana Jankovica – Pozitivna Slovenija",
NA, "Mouvement national républicain", "ID", NA), Reason1 = c("foundation",
NA, "split from FN", "split", NA), Party_Title1 = c(0, NA, 0,
0, NA), alliance1 = c(0, NA, 0, 0, NA), member_parties1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_),
split1 = c(0, NA, 1, 1, NA), parent_party1 = c(NA, NA, "FN",
"MDP", NA), merger1 = c(0, NA, 0, 0, NA), child_party1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), successor1 = c(0, NA, 0, 0, NA), predecessor1 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), Date2 = c(2012, NA, NA, NA, NA), Rename2 = c("Pozitivna Slovenija",
NA, NA, NA, NA), Reason2 = c("renamed", NA, NA, NA, NA),
Party_Title2 = c(0, NA, NA, NA, NA), alliance2 = c(0, NA,
NA, NA, NA), member_parties2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), split2 = c(0,
NA, NA, NA, NA), parent_party2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), merger2 = c(0,
NA, NA, NA, NA), child_party2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), successor2 = c(0,
NA, NA, NA, NA), predecessor2 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), Date3 = c(2014,
NA, NA, NA, NA), Rename3 = c("ZaAB", NA, NA, NA, NA), Reason3 = c("split",
NA, NA, NA, NA), Party_Title3 = c(0, NA, NA, NA, NA), alliance3 = c(0,
NA, NA, NA, NA), member_parties3 = c(NA_character_, NA_character_,
NA_character_, NA_character_, NA_character_), split3 = c(1,
NA, NA, NA, NA), parent_party3 = c("LZJ-PS", NA, NA, NA,
NA), merger3 = c(0, NA, NA, NA, NA), child_party3 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), successor3 = c(0, NA, NA, NA, NA), predecessor3 = c(NA_character_,
NA_character_, NA_character_, NA_character_, NA_character_
), election1 = structure(c(15309, 16740, 11839, 6390, 17956
), class = "Date"), election2 = structure(c(16252, NA, NA,
NA, NA), class = "Date"), election3 = structure(c(16344,
NA, NA, NA, NA), class = "Date")), row.names = c(NA, -5L), class = c("tbl_df",
"tbl", "data.frame"))
我希望数据遵循“长”结构,其中每个 party_id 和常数因子重复 11 次,并且时间变化因子有单列。按照here 制定的最高评价答案,我尝试了以下命令的不同变体:
pivot_longer(cols = starts_with(c("election", "Date", "Rename", "Reason", "Party_Title",
"alliance", "member_parties", "split", "parent_party",
"merger", "child_party", "successor", "predecessor")),
names_to = c(".value", "election", "Date", "Rename", "Reason", "Party_Title",
"alliance", "member_parties", "split", "parent_party",
"merger", "child_party", "successor", "predecessor"), names_sep = "_") %>%
select(-matches("election[1-9]"), -matches("Date[1-9]"), -matches("Rename[1-9]"),
-matches("Reason[1-9]"), -matches("alliance[1-9]"), -matches("member_parties[1-9]"),
-matches("split[1-9]"), -matches("parent_party[1-9]"), -matches("merger[1-9]"),
-matches("child_party[1-9]"), -matches("successor[1-9]"), -matches("predecessor[1-9]"),
-matches("Party_Title[1-9]"), -matches("election1[0-2]"), -matches("Date1[0-2]"), -matches("Rename1[0-2]"),
-matches("Reason1[0-2]"), -matches("alliance1[0-2]"), -matches("member_parties1[0-2]"),
-matches("split1[0-2]"), -matches("parent_party1[0-2]"), -matches("merger1[0-2]"),
-matches("child_party1[0-2]"), -matches("successor1[0-2]"), -matches("predecessor1[0-2]"),
-matches("Party_Title1[0-2]"))
但是,由于某种原因,我得到了很多缺失值,并且没有达到我想要的数据形状。如果您知道如何执行此操作,我将不胜感激。谢谢!
更新:
我希望最终输出看起来像:
structure(list(Party_Name_Short = c("LZJ-PS", "ZiZi", "MNR",
"MDP", "E200", "LZJ-PS", "ZiZi", "MNR", "MDP", "E200", "LZJ-PS",
"ZiZi", "MNR", "MDP", "E200"), Party_Name = c("Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200", "Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200", "Lista Zorana Jankovica – Pozitivna Slovenija",
"Živi zid", "Mouvement national républicain", "Movimento Democrático Português",
"Erakond Eesti 200"), Country = c("SVN", "HRV", "FRA", "PRT",
"EST", "SVN", "HRV", "FRA", "PRT", "EST", "SVN", "HRV", "FRA",
"PRT", "EST"), Party_ID = c(1987, 2612, 1263, 1281, 2720, 1987,
2612, 1263, 1281, 2720, 1987, 2612, 1263, 1281, 2720), Party_in_orig_title = c(0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), time = c(1, 1, 1,
1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3), Date = c(2011, NA, 1999,
1987, NA, 2012, NA, NA, NA, NA, 2014, NA, NA, NA, NA), Rename = c("Lista Zorana Jankovica – Pozitivna Slovenija",
NA, "Mouvement national républicain", "ID", NA, "Pozitivna Slovenija",
NA, NA, NA, NA, "ZaAB", NA, NA, NA, NA), Reason = c("foundation",
NA, "split from FN", "split", NA, "renamed", NA, NA, NA, NA,
"split", NA, NA, NA, NA), Party_Title = c(0, NA, 0, 0, NA, 0,
NA, NA, NA, NA, 0, NA, NA, NA, NA), alliance = c(0, NA, 0, 0,
NA, 0, NA, NA, NA, NA, 0, NA, NA, NA, NA), member_parties = c(NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), split = c(0,
NA, 1, 1, NA, 0, NA, NA, NA, NA, 1, NA, NA, NA, NA), parent_party = c(NA,
NA, "FN", "MDP", NA, NA, NA, NA, NA, NA, "LZJ-PS", NA, NA, NA,
NA), merger = c(0, NA, 0, 0, NA, 0, NA, NA, NA, NA, 0, NA, NA,
NA, NA), child_party = c(NA, NA, NA, NA, NA, NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA), successor = c(0, NA, 0, 0, NA, 0, NA,
NA, NA, NA, 0, NA, NA, NA, NA), predecessor = c(NA, NA, NA, NA,
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), election = structure(c(1322697600,
1446336000, 1022889600, 552096000, 1551398400, 1404172800, NA,
NA, NA, NA, 1412121600, NA, NA, NA, NA), class = c("POSIXct",
"POSIXt"), tzone = "UTC")), row.names = c(NA, -15L), class = c("tbl_df",
"tbl", "data.frame"))
注意:新增的time列,注意这只是举例,有3个时间变化因子,而实际上数据中有11个。
【问题讨论】:
-
你能展示你的预期输出吗
-
我现在添加了所需的输出。