【问题标题】:finding the closest XY coordinates in a large dataframe to reference coordinates in another dataframe in R在大型数据框中找到最接近的 XY 坐标以参考 R 中另一个数据框中的坐标
【发布时间】:2020-12-18 10:50:30
【问题描述】:

我有一个数据集,其中包含 1000 行的数据框中的 ID。示例数据框如下

df1 <- data.frame(X = c(7.48, 7.82, 8.15, 8.47, 8.80, 9.20, 9.51, 9.83, 10.13, 10.59, 7.59, 8.06, 
8.39, 8.87, 9.26, 9.64, 10.09, 10.48, 10.88, 11.45), 
              Y = c(49.16, 48.78, 48.40, 48.03, 47.65, 47.24, 46.87, 46.51, 46.15, 45.73, 48.70, 
48.18, 47.72, 47.20, 46.71, 46.23, 45.72, 45.24, 44.77, 44.23), 
              ID = c("B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1_2", 
"B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2"), 
              TI = c(191.31, 191.35, 191.39, 191.44, 191.48, 191.52, 191.56, 191.60, 191.64, 191.69, 
1349.93, 1349.97, 1350.01, 1350.05, 1350.09, 1350.14, 1350.18, 1350.22, 1350.26, 1350.30))

在“df1”数据框中,我想根据另一个数据框中坐标之间的最近距离对每个 ID 中的行进行子集化。参考数据框示例如下

df2 <- data.frame(X = c(7.62,  8.25,  8.95,  9.71,  10.23), 
              Y = c(49.06,  48.30,  47.55,  46.77,  46.25))

计算“df1”数据帧和“df2”数据帧中每个坐标之间的距离,以找到与“df2”数据帧最接近的坐标。测试数据计算在excel中计算出来,如下图所示

预期的期望输出如下

我尝试了以下代码....

df1 <- data.frame(X = c(7.48, 7.82, 8.15, 8.47, 8.80, 9.20, 9.51, 9.83, 10.13, 10.59, 7.59, 8.06, 
8.39, 8.87, 9.26, 9.64, 10.09, 10.48, 10.88, 11.45), 
              Y = c(49.16, 48.78, 48.40, 48.03, 47.65, 47.24, 46.87, 46.51, 46.15, 45.73, 48.70, 
48.18, 47.72, 47.20, 46.71, 46.23, 45.72, 45.24, 44.77, 44.23), 
              ID = c("B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1_2", 
"B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2"), 
              TI = c(191.31, 191.35, 191.39, 191.44, 191.48, 191.52, 191.56, 191.60, 191.64, 191.69, 
1349.93, 1349.97, 1350.01, 1350.05, 1350.09, 1350.14, 1350.18, 1350.22, 1350.26, 1350.30))

df2 <- data.frame(X = c(7.62,  8.25,  8.95,  9.71,  10.23), 
              Y = c(49.06,  48.30,  47.55,  46.77,  46.25))
library(data.table)
df1=as.data.table(df1)
a <- do.call(rbind,
    apply(df2,1,function(i){
      df1[,d:=(df1$X-i[1])^2+(df1$Y-i[2])^2]
      df1[df1[,.I[d==min(d)],by=ID]$V1]
    })

library(dplyr)
sorted <- a %>% arrange(ID,TI)
)

我正在寻找代码以获得所需的输出

【问题讨论】:

    标签: r math data-manipulation


    【解决方案1】:

    第一步

    我们可以使用以下代码计算每个 df1 和 df2 之间的距离

    df1 <- data.frame(X = c(7.48, 7.82, 8.15, 8.47, 8.80, 9.20, 9.51, 9.83, 10.13, 10.59, 7.59, 8.06, 8.39, 8.87, 9.26, 9.64, 10.09, 10.48, 10.88, 11.45), 
                  Y = c(49.16, 48.78, 48.40, 48.03, 47.65, 47.24, 46.87, 46.51, 46.15, 45.73, 48.70, 48.18, 47.72, 47.20, 46.71, 46.23, 45.72, 45.24, 44.77, 44.23), 
                  ID = c("B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2", "B_1_2"), 
                  TI = c(191.31, 191.35, 191.39, 191.44, 191.48, 191.52, 191.56, 191.60, 191.64, 191.69, 1349.93, 1349.97, 1350.01, 1350.05, 1350.09, 1350.14, 1350.18, 1350.22, 1350.26, 1350.30))
    
    df2 <- data.frame(X = c(7.62,  8.25,  8.95,  9.71,  10.23), 
                  Y = c(49.06,  48.30,  47.55,  46.77,  46.25))
    
    
    Azimuth <- data.frame(0)
    for (i in 1:nrow(df2)){
      for (j in 1:nrow(df1)){
    Azimuth[j,i] = sqrt((df2[i,1]-df1[j,1])^2+(df2[i,2]-df1[j,2])^2)
    }}
    

    然后我们可以使用以下代码将df1数据框和方位角结合起来

    df1 <- data.frame(df1,Azimuth)
    

    第二步

    [为了得到想要的输出,我们从“akrun”中得到了答案] https://stackoverflow.com/questions/65345208/how-to-subset-rows-in-specific-columns-based-on-minimum-values-in-individual-col[1]

    代码如下

    library(dplyr)
    library(purrr)
    nm1 <- names(df1)[5:6]
    map_dfr(nm1, ~ df1 %>%
         group_by(ID) %>%
         slice_min(!! rlang::sym(.x))) %>% 
     ungroup %>%
     mutate(d = select(., all_of(nm1)) %>% reduce(pmin))
    

    【讨论】:

      猜你喜欢
      • 1970-01-01
      • 2022-11-27
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2016-11-25
      • 2019-09-09
      • 1970-01-01
      相关资源
      最近更新 更多