【发布时间】:2018-07-18 11:42:30
【问题描述】:
我知道以前有人问过这种性质的问题(将列表转换为数据框),但是我遇到了一个特殊的问题,即我想将其转换为数据框的嵌套列表列表。我得到的数据来自 R 中的 API 调用,因此我要处理这个列表结构的嵌套列表。这是我正在使用的 API 返回对象的一个小示例(5 场运动数据):
dput(soccer_data)
list(structure(list(id = 1603158L, league_id = 779L, season_id = 914L,
stage_id = 1810L, round_id = 29156L, group_id = NULL, aggregate_id = NULL,
venue_id = 139L, referee_id = 656L, localteam_id = 607L,
visitorteam_id = 3639L, weather_report = NULL, commentaries = TRUE,
attendance = NULL, pitch = NULL, winning_odds_calculated = FALSE,
formations = structure(list(localteam_formation = "4-2-3-1",
visitorteam_formation = "4-1-4-1"), .Names = c("localteam_formation",
"visitorteam_formation")), scores = structure(list(localteam_score = 5L,
visitorteam_score = 1L, localteam_pen_score = 0L, visitorteam_pen_score = 0L,
ht_score = "1-0", ft_score = "5-1", et_score = NULL), .Names = c("localteam_score",
"visitorteam_score", "localteam_pen_score", "visitorteam_pen_score",
"ht_score", "ft_score", "et_score")), time = structure(list(
status = "FT", starting_at = structure(list(date_time = "2017-03-04 05:30:00",
date = "2017-03-04", time = "05:30:00", timestamp = 1488605400L,
timezone = "UTC"), .Names = c("date_time", "date",
"time", "timestamp", "timezone")), minute = 90L, extra_minute = NULL,
injury_time = NULL), .Names = c("status", "starting_at",
"minute", "extra_minute", "injury_time")), coaches = structure(list(
localteam_coach_id = 429924L, visitorteam_coach_id = 429940L), .Names = c("localteam_coach_id",
"visitorteam_coach_id")), standings = structure(list(localteam_position = NULL,
visitorteam_position = NULL), .Names = c("localteam_position",
"visitorteam_position")), deleted = FALSE), .Names = c("id",
"league_id", "season_id", "stage_id", "round_id", "group_id",
"aggregate_id", "venue_id", "referee_id", "localteam_id", "visitorteam_id",
"weather_report", "commentaries", "attendance", "pitch", "winning_odds_calculated",
"formations", "scores", "time", "coaches", "standings", "deleted"
)), structure(list(id = 1603159L, league_id = 779L, season_id = 914L,
stage_id = 1810L, round_id = 29156L, group_id = NULL, aggregate_id = NULL,
venue_id = 113L, referee_id = 3614L, localteam_id = 577L,
visitorteam_id = 75L, weather_report = NULL, commentaries = FALSE,
attendance = NULL, pitch = NULL, winning_odds_calculated = FALSE,
formations = structure(list(localteam_formation = "4-2-3-1",
visitorteam_formation = "4-2-3-1"), .Names = c("localteam_formation",
"visitorteam_formation")), scores = structure(list(localteam_score = 1L,
visitorteam_score = 1L, localteam_pen_score = 0L, visitorteam_pen_score = 0L,
ht_score = "1-0", ft_score = "1-1", et_score = NULL), .Names = c("localteam_score",
"visitorteam_score", "localteam_pen_score", "visitorteam_pen_score",
"ht_score", "ft_score", "et_score")), time = structure(list(
status = "FT", starting_at = structure(list(date_time = "2017-03-04 22:00:00",
date = "2017-03-04", time = "22:00:00", timestamp = 1488664800L,
timezone = "UTC"), .Names = c("date_time", "date",
"time", "timestamp", "timezone")), minute = 90L, extra_minute = NULL,
injury_time = NULL), .Names = c("status", "starting_at",
"minute", "extra_minute", "injury_time")), coaches = structure(list(
localteam_coach_id = 455860L, visitorteam_coach_id = 176760L), .Names = c("localteam_coach_id",
"visitorteam_coach_id")), standings = structure(list(localteam_position = NULL,
visitorteam_position = NULL), .Names = c("localteam_position",
"visitorteam_position")), deleted = FALSE), .Names = c("id",
"league_id", "season_id", "stage_id", "round_id", "group_id",
"aggregate_id", "venue_id", "referee_id", "localteam_id", "visitorteam_id",
"weather_report", "commentaries", "attendance", "pitch", "winning_odds_calculated",
"formations", "scores", "time", "coaches", "standings", "deleted"
)), structure(list(id = 1603160L, league_id = 779L, season_id = 914L,
stage_id = 1810L, round_id = 29156L, group_id = NULL, aggregate_id = NULL,
venue_id = 28L, referee_id = 555L, localteam_id = 413L, visitorteam_id = 583L,
weather_report = NULL, commentaries = FALSE, attendance = 23554L,
pitch = NULL, winning_odds_calculated = FALSE, formations = structure(list(
localteam_formation = "4-4-1-1", visitorteam_formation = "4-4-2"), .Names = c("localteam_formation",
"visitorteam_formation")), scores = structure(list(localteam_score = 1L,
visitorteam_score = 2L, localteam_pen_score = 0L, visitorteam_pen_score = 0L,
ht_score = "0-0", ft_score = "1-2", et_score = NULL), .Names = c("localteam_score",
"visitorteam_score", "localteam_pen_score", "visitorteam_pen_score",
"ht_score", "ft_score", "et_score")), time = structure(list(
status = "FT", starting_at = structure(list(date_time = "2017-03-05 00:00:00",
date = "2017-03-05", time = "00:00:00", timestamp = 1488672000L,
timezone = "UTC"), .Names = c("date_time", "date",
"time", "timestamp", "timezone")), minute = 90L, extra_minute = NULL,
injury_time = NULL), .Names = c("status", "starting_at",
"minute", "extra_minute", "injury_time")), coaches = structure(list(
localteam_coach_id = 429914L, visitorteam_coach_id = 429917L), .Names = c("localteam_coach_id",
"visitorteam_coach_id")), standings = structure(list(localteam_position = NULL,
visitorteam_position = NULL), .Names = c("localteam_position",
"visitorteam_position")), deleted = FALSE), .Names = c("id",
"league_id", "season_id", "stage_id", "round_id", "group_id",
"aggregate_id", "venue_id", "referee_id", "localteam_id", "visitorteam_id",
"weather_report", "commentaries", "attendance", "pitch", "winning_odds_calculated",
"formations", "scores", "time", "coaches", "standings", "deleted"
)), structure(list(id = 1603161L, league_id = 779L, season_id = 914L,
stage_id = 1810L, round_id = 29156L, group_id = NULL, aggregate_id = NULL,
venue_id = 411L, referee_id = 274L, localteam_id = 1062L,
visitorteam_id = 111L, weather_report = NULL, commentaries = FALSE,
attendance = NULL, pitch = NULL, winning_odds_calculated = FALSE,
formations = structure(list(localteam_formation = "4-2-3-1",
visitorteam_formation = "3-5-2"), .Names = c("localteam_formation",
"visitorteam_formation")), scores = structure(list(localteam_score = 0L,
visitorteam_score = 0L, localteam_pen_score = 0L, visitorteam_pen_score = 0L,
ht_score = "0-0", ft_score = "0-0", et_score = NULL), .Names = c("localteam_score",
"visitorteam_score", "localteam_pen_score", "visitorteam_pen_score",
"ht_score", "ft_score", "et_score")), time = structure(list(
status = "FT", starting_at = structure(list(date_time = "2017-03-05 00:30:00",
date = "2017-03-05", time = "00:30:00", timestamp = 1488673800L,
timezone = "UTC"), .Names = c("date_time", "date",
"time", "timestamp", "timezone")), minute = 90L, extra_minute = NULL,
injury_time = NULL), .Names = c("status", "starting_at",
"minute", "extra_minute", "injury_time")), coaches = structure(list(
localteam_coach_id = 456638L, visitorteam_coach_id = 516577L), .Names = c("localteam_coach_id",
"visitorteam_coach_id")), standings = structure(list(localteam_position = NULL,
visitorteam_position = NULL), .Names = c("localteam_position",
"visitorteam_position")), deleted = FALSE), .Names = c("id",
"league_id", "season_id", "stage_id", "round_id", "group_id",
"aggregate_id", "venue_id", "referee_id", "localteam_id", "visitorteam_id",
"weather_report", "commentaries", "attendance", "pitch", "winning_odds_calculated",
"formations", "scores", "time", "coaches", "standings", "deleted"
)), structure(list(id = 1603162L, league_id = 779L, season_id = 914L,
stage_id = 1810L, round_id = 29157L, group_id = NULL, aggregate_id = NULL,
venue_id = 11573L, referee_id = 370L, localteam_id = 179L,
visitorteam_id = 641L, weather_report = NULL, commentaries = FALSE,
attendance = NULL, pitch = NULL, winning_odds_calculated = FALSE,
formations = structure(list(localteam_formation = "4-2-3-1",
visitorteam_formation = "4-3-1-2"), .Names = c("localteam_formation",
"visitorteam_formation")), scores = structure(list(localteam_score = 1L,
visitorteam_score = 0L, localteam_pen_score = 0L, visitorteam_pen_score = 0L,
ht_score = "0-0", ft_score = "1-0", et_score = NULL), .Names = c("localteam_score",
"visitorteam_score", "localteam_pen_score", "visitorteam_pen_score",
"ht_score", "ft_score", "et_score")), time = structure(list(
status = "FT", starting_at = structure(list(date_time = "2017-03-05 02:00:00",
date = "2017-03-05", time = "02:00:00", timestamp = 1488679200L,
timezone = "UTC"), .Names = c("date_time", "date",
"time", "timestamp", "timezone")), minute = 90L, extra_minute = NULL,
injury_time = NULL), .Names = c("status", "starting_at",
"minute", "extra_minute", "injury_time")), coaches = structure(list(
localteam_coach_id = 524071L, visitorteam_coach_id = 261458L), .Names = c("localteam_coach_id",
"visitorteam_coach_id")), standings = structure(list(localteam_position = NULL,
visitorteam_position = NULL), .Names = c("localteam_position",
"visitorteam_position")), deleted = FALSE), .Names = c("id",
"league_id", "season_id", "stage_id", "round_id", "group_id",
"aggregate_id", "venue_id", "referee_id", "localteam_id", "visitorteam_id",
"weather_report", "commentaries", "attendance", "pitch", "winning_odds_calculated",
"formations", "scores", "time", "coaches", "standings", "deleted"
)))
soccer_data 有 5 场 MLS 足球数据,这是我目前正在做的将其转换为数据框的方法:
# grab the "scores" info from the nested list $scores (from each game)
season_scores <- data.frame()
for(i in 1:length(soccer_data)) {
game_scores <- as.data.frame(t(unlist(soccer_data[[i]]$scores)), stringsAsFactors = FALSE)
game_scores$date <- as.Date(soccer_data[[i]]$time$starting_at$date)
season_scores <- rbind.fill(season_scores, game_scores)
}
season_scores <- season_scores %>% readr::type_convert()
# create df of the game scores, add the season scores, and drop the bad cols
season_boxscores <- as.data.frame(do.call(rbind, soccer_data), stringsAsFactors = FALSE) %>%
dplyr::select(-one_of(c('scores', 'group_id', 'aggregate_id', 'time', 'standings'))) %>%
cbind(season_scores) %>%
readr::type_convert()
不幸的是,这种方法的问题是最后一个 type_convert() 函数调用没有做我想要的,并且生成的 season_boxscores 数据帧的列的类主要是 class== 列表。
# check yourself
sapply(season_boxscores, class)
那么我的问题是:
- 我怎样才能做到这一点,以使 season_boxscores 中所有列的类不都是 class== 列表?还有,
- 我是否使用 do.call、rbind 和 as.data.frame 以最佳方式执行此操作(从列表列表转换)?
提前致谢!
编辑:如果所有嵌套列表(在这种情况下,soccer_data 有一些:编队、分数、时间、教练、排名)本身都没有嵌套,就像我取消嵌套它们一样,那就太好了分数的 for 循环。
编辑 2:很抱歉只为 5 场比赛分享这么大的列表对象。在列表列表或像这样的大嵌套对象中,我实际上不知道如何从每个嵌套列表中删除相同的项目,我会为这篇文章做的。 (即从每个soccer_data[[i]] 中删除league_id、round_id 等)。如果有人知道该怎么做,那就太好了!
编辑 3:因为 football_data 不仅仅是一个列表列表,而是一个列表列表(每个列表列表中都有其他非列表对象),这里没有任何解决方案 - Force list of lists into dataframe - 继续足球数据。
【问题讨论】:
标签: r data-manipulation