我认为这是使用 SQL 不容易做到的事情。例如,在 PostgreSQL 中有一个单独的库来执行此操作(请参阅here)。
但是dbplyr 在这里可能会有所帮助,无论是直接使用还是间接使用来生成可以在该包之外使用的 SQL。我没有 MySQL,所以我在 PostgreSQL 和 SQLite 上测试了以下内容。我认为它也应该与RMySQL 一起使用:
library(tibble)
library(tidyr)
library(DBI)
library(dplyr, warn.conflicts = FALSE)
df <- tibble::tribble(
~zipcode, ~category, ~zipnum,
'12345', 'A', '1',
'40348', 'A', '2',
'16132', 'B', '1',
'09428', 'B', '2',
'14818', 'B', '3',
'93182', 'C', '1')
pg <- dbConnect(RPostgres::Postgres())
df_pg <- copy_to(pg, df, overwrite = TRUE)
df_pg %>%
pivot_wider(names_from = "zipnum", values_from = "zipcode",
names_prefix = "zipcode")
#> # Source: lazy query [?? x 4]
#> # Database: postgres [iangow@/tmp:5432/crsp]
#> category zipcode3 zipcode2 zipcode1
#> <chr> <chr> <chr> <chr>
#> 1 A <NA> 40348 12345
#> 2 C <NA> <NA> 93182
#> 3 B 14818 09428 16132
df_pg %>%
pivot_wider(names_from = "zipnum", values_from = "zipcode",
names_prefix = "zipcode") %>%
show_query()
#> <SQL>
#> SELECT "category", MAX(CASE WHEN ("zipnum" = '3') THEN ("zipcode") END) AS "zipcode3", MAX(CASE WHEN ("zipnum" = '2') THEN ("zipcode") END) AS "zipcode2", MAX(CASE WHEN ("zipnum" = '1') THEN ("zipcode") END) AS "zipcode1"
#> FROM "df"
#> GROUP BY "category"
df_sqlite <- dbplyr::memdb_frame(df)
df_sqlite %>%
pivot_wider(names_from = "zipnum", values_from = "zipcode",
names_prefix = "zipcode")
#> # Source: lazy query [?? x 4]
#> # Database: sqlite 3.35.2 [:memory:]
#> category zipcode1 zipcode2 zipcode3
#> <chr> <chr> <chr> <chr>
#> 1 A 12345 40348 <NA>
#> 2 B 16132 09428 14818
#> 3 C 93182 <NA> <NA>
df_sqlite %>%
pivot_wider(names_from = "zipnum", values_from = "zipcode",
names_prefix = "zipcode") %>%
show_query()
#> <SQL>
#> SELECT `category`, MAX(CASE WHEN (`zipnum` = '1') THEN (`zipcode`) END) AS `zipcode1`, MAX(CASE WHEN (`zipnum` = '2') THEN (`zipcode`) END) AS `zipcode2`, MAX(CASE WHEN (`zipnum` = '3') THEN (`zipcode`) END) AS `zipcode3`
#> FROM `dbplyr_001`
#> GROUP BY `category`
由reprex package (v1.0.0) 于 2021-04-04 创建