1) 以下是基本、dplyr 和 sqldf 解决方案。我们连接 A 和 B,然后按 ItinID 排序(尽管 dplyr 解决方案似乎已经按这种方式排序,所以我们省略了这部分)。
m <- merge(A, B)
m[order(m$ItinID), names(A)]
## ItinID Origin Destination Carrier Class Fare
## 1 1 AB BC Delta Econ 100
## 3 2 AB BC Delta Premium 500
## 2 5 AB BC Delta Econ 150
## 4 6 AB BC Delta Premium 700
library(dplyr)
semi_join(A, B)
## Joining, by = c("Origin", "Destination", "Carrier", "Class")
## ItinID Origin Destination Carrier Class Fare
## 1 1 AB BC Delta Econ 100
## 2 2 AB BC Delta Premium 500
## 3 5 AB BC Delta Econ 150
## 4 6 AB BC Delta Premium 700
library(sqldf)
sqldf("select A.* from B natural join A order by ItinID")
## ItinID Origin Destination Carrier Class Fare
## 1 1 AB BC Delta Econ 100
## 2 2 AB BC Delta Premium 500
## 3 5 AB BC Delta Econ 150
## 4 6 AB BC Delta Premium 700
2) 如果没有 B,我们可以使用来自 base 或 dplyr 的 filter 或 sql 中相关子查询的 by 和 subset。
do.call("rbind", by(A, A[c("Origin", "Destination", "Carrier")], subset,
all(c("Econ", "Premium") %in% Class) & all(Fare >= 100)))
## ItinID Origin Destination Carrier Class Fare
## 1 1 AB BC Delta Econ 100
## 2 2 AB BC Delta Premium 500
## 5 5 AB BC Delta Econ 150
## 6 6 AB BC Delta Premium 700
library(dplyr)
A %>%
group_by(Origin, Destination, Carrier) %>%
filter(all(c("Econ", "Premium") %in% Class) & all(Fare >= 100)) %>%
ungroup
## # A tibble: 4 x 6
## ItinID Origin Destination Carrier Class Fare
## <chr> <chr> <chr> <chr> <chr> <int>
## 1 1 AB BC Delta Econ 100
## 2 2 AB BC Delta Premium 500
## 3 5 AB BC Delta Econ 150
## 4 6 AB BC Delta Premium 700
library(sqldf)
cond <- "x.Origin = y.Origin and x.Destination = y.Destination and
x.Carrier = y.Carrier"
fn$sqldf("select * from A x where
'Econ' in (select Class from A y where $cond) and
'Premium' in (select Class from A y where $cond) and
100 <= (select min(Fare) from A y where $cond)")
## ItinID Origin Destination Carrier Class Fare
## 1 1 AB BC Delta Econ 100
## 2 2 AB BC Delta Premium 500
## 3 5 AB BC Delta Econ 150
## 4 6 AB BC Delta Premium 700