【问题标题】:Replace a row value with sum of other rows when conditions are met满足条件时将行值替换为其他行的总和
【发布时间】:2019-10-08 15:44:18
【问题描述】:

我正在尝试计算如何根据多个条件对行求和,但保留我的数据框中的所有行,因此只有满足条件的行具有求和值,所有其他行具有原始值(来自名为 iresult_posPPP 的列)。我有多个国家/地区、不同年份、部门等的数据。如果我的 ISO 列是“ALL”,我需要这个值是 iresult_posPPP 中所有行的总和,这些行对 DataYear、Division、FurtherDetails1 和 FurtherDetails2 具有相同的值。

我在下面附上了我的数据框的 dput:

   Data <- structure(list(ID.x = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), Indicator = c("Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre", "Gym and Leisure Centre", 
"Gym and Leisure Centre", "Gym and Leisure Centre"), IndicatorID = c(98L, 
98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 98L, 
98L, 98L, 98L, 98L, 98L, 98L, 98L), InputA = c(3762.134507, 4447.309316, 
4293.110921, 3862.76676, 4071.175351, 4323.016371, 4036.130293, 
4450.854575, 4224.807119, 4563, 4888.173223, 4473.037451, 4136.032594, 
4429.615323, 4972.768468, 4910.636192, 4947.585664, 4119.657378, 
4809.477176, 4135.606089, 4935.381334), InputAName = c("Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym", 
"Total number of people using the gym", "Total number of people using the gym"
), InputAUnit = c("#", "#", "#", "#", "#", "#", "#", "#", "#", 
"#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#", "#"), 
    OutputCode = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), DataYear = c(2016L, 
    2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2016L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L), Country.x = c("Uzbekistan", "Vanuatu", "Venezuela, RB", 
    "Vietnam", "Virgin Islands (U.S.)", "West Bank and Gaza", 
    "Yemen, Rep.", "Zambia", "Zimbabwe", "ALL", "Afghanistan", 
    "Albania", "Algeria", "American Samoa", "Andorra", "Angola", 
    "Antigua and Barbuda", "Argentina", "Armenia", "Aruba", "Australia"
    ), ISO = c("UZB", "VUT", "VEN", "VNM", "VIR", "PSE", "YEM", 
    "ZMB", "ZWE", "ALL", "AFG", "ALB", "DZA", "ASM", "AND", "AGO", 
    "ATG", "ARG", "ARM", "ABW", "AUS"), Division = c("Two", "Two", 
    "Two", "Two", "Two", "Two", "Two", "Two", "Two", "One", "One", 
    "One", "One", "One", "One", "One", "One", "One", "One", "One", 
    "One"), FurtherDetails1 = c("fd1b", "fd1b", "fd1b", "fd1b", 
    "fd1b", "fd1b", "fd1b", "fd1b", "fd1b", "fd1a", "fd1a", "fd1a", 
    "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", "fd1a", 
    "fd1a"), FurtherDetails2 = c("fd2b", "fd2b", "fd2b", "fd2b", 
    "fd2b", "fd2b", "fd2b", "fd2b", "fd2b", "fd2a", "fd2a", "fd2a", 
    "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", "fd2a", 
    "fd2a"), ID.y = c(168L, 168L, 168L, 168L, 168L, 168L, 168L, 
    168L, 168L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 189L, 
    189L, 189L, 189L, 189L), Code.x = c("GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"
    ), CoefficientYear = c(2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L), Percent = c(-2.69, 
    -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, -2.69, 0, 
    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), CoeffqInflation = c(-0.0269, 
    -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, -0.0269, 
    -0.0269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), Method = c("", 
    "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", 
    "", "", "", "", ""), ID.x.x = c(248L, 248L, 248L, 248L, 248L, 
    248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 248L, 
    248L, 248L, 248L, 248L, 248L, 248L), InputCode = c("GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP"), CoeffqFactor = c(1, 1, 1, 1, 1, 1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Year = c(2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 2017L, 
    2017L), Type = c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), ID.y.y = c(212L, 
    213L, 214L, 215L, 216L, 217L, 218L, 219L, 220L, 221L, 1L, 
    2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 11L), Country.y = c("Uzbekistan", 
    "Vanuatu", "Venezuela, RB", "Vietnam", "Virgin Islands (U.S.)", 
    "West Bank and Gaza", "Yemen, Rep.", "Zambia", "Zimbabwe", 
    "ALL", "Afghanistan", "Albania", "Algeria", "American Samoa", 
    "Andorra", "Angola", "Antigua and Barbuda", "Argentina", 
    "Armenia", "Aruba", "Australia"), Currency = c("GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP"), CoeffqPPP = c(0.184436278, 0.184436278, 0.367527677, 
    0.184436278, 0.994525717, 0.184436278, 0.184436278, 0.184436278, 
    0.112411792, 0, 0.112411792, 0.347344672, 0.367527677, 0.367527677, 
    0.994525717, 0.264750217, 0.994525717, 0.389806873, 0.188019865, 
    0.994525717, 0.994525717), Step = c(3, 3, 3, 3, 3, 3, 3, 
    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3), Coeffq = c(2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717, 
    2756.10980649717, 2756.10980649717, 2756.10980649717, 2756.10980649717
    ), CoeffqYear = c("2017", "2017", "2017", "2017", "2017", 
    "2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017", 
    "2017", "2017", "2017", "2017", "2017", "2017", "2017", "2017"
    ), CoeffqUnit = c("GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", "GBP", 
    "GBP", "GBP", "GBP", "GBP", "GBP", "GBP"), Step2iresult = c(1.32904727554592, 
    1.57109861939866, 1.51662503362256, 1.36459757855418, 1.43822197171544, 
    1.52718971619107, 1.42584162253612, 1.57235105117809, 1.49249538546973, 
    1.61196860639412, 1.72684237959508, 1.58018758409756, 1.46113405578585, 
    1.56484787181204, 1.75672729723484, 1.73477786082112, 1.74783098133106, 
    1.45534919185534, 1.69904146852532, 1.46098338458917, 1.74351956410071
    ), iresult_pos = c(3564.46552325877, 4213.64007550875, 4067.54352800449, 
    3659.81038551155, 3857.2688325682, 4095.8776944954, 3824.06510187511, 
    4216.9990605352, 4002.82852462449, 4442.76248384841, 4759.36721667692, 
    4355.17049663636, 4027.04589975837, 4312.8925651774, 4841.73333125022, 
    4781.23827430327, 4817.21410774609, 4011.10217955023, 4682.744853048, 
    4026.63063339563, 4805.33136843763), iresult_posPPP = c(675.590128629298, 
    798.631273618818, 1536.26022396863, 693.661294511865, 3942.19818248036, 
    776.311208628047, 724.793273270513, 799.267918050159, 462.403789458181, 
    0, 535.008997612704, 1512.74526765823, 1480.05082471057, 
    1585.10738563022, 4815.22831278442, 1265.8338706505, 4790.8433144487, 
    1563.55519789396, 880.449055099529, 4004.58771777195, 4779.02562461803
    ), iresult_pos_m = c(0.00356446552325877, 0.00421364007550875, 
    0.00406754352800449, 0.00365981038551155, 0.0038572688325682, 
    0.0040958776944954, 0.00382406510187511, 0.0042169990605352, 
    0.00400282852462449, 0.00444276248384841, 0.00475936721667692, 
    0.00435517049663636, 0.00402704589975837, 0.0043128925651774, 
    0.00484173333125022, 0.00478123827430327, 0.00481721410774609, 
    0.00401110217955023, 0.004682744853048, 0.00402663063339563, 
    0.00480533136843763), ID = c(NA, NA, NA, NA, NA, NA, NA, 
    NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA), 
    StepName = c("Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia", "Avoided cost of Dementia", "Avoided cost of Dementia", 
    "Avoided cost of Dementia"), ImpactID = c(10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L), Allocation = c(1, 1, 1, 1, 
    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), Polarity = c("Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive", 
    "Positive", "Positive", "Positive", "Positive", "Positive"
    ), Waterfall = c("Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
    "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", "Yes", 
    "Yes", "Yes", "Yes", "Yes", "Yes", "Yes"), Int_Ext = c("External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External", 
    "External", "External", "External", "External", "External"
    ), ImpactCode = c("GDF", "GDF", "GDF", "GDF", "GDF", "GDF", 
    "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", "GDF", 
    "GDF", "GDF", "GDF", "GDF", "GDF", "GDF"), ImpactName = c("Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial", 
    "Government // Direct Financial", "Government // Direct Financial"
    ), Name = c("Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres", "Gym and Leisure Centres", "Gym and Leisure Centres", 
    "Gym and Leisure Centres"), Capital = c("Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured", "Manufactured", 
    "Manufactured", "Manufactured", "Manufactured"), Flow = c("TBC", 
    "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", 
    "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", "TBC", 
    "TBC", "TBC"), iresult = c(3564.46552325877, 4213.64007550875, 
    4067.54352800449, 3659.81038551155, 3857.2688325682, 4095.8776944954, 
    3824.06510187511, 4216.9990605352, 4002.82852462449, 4442.76248384841, 
    4759.36721667692, 4355.17049663636, 4027.04589975837, 4312.8925651774, 
    4841.73333125022, 4781.23827430327, 4817.21410774609, 4011.10217955023, 
    4682.744853048, 4026.63063339563, 4805.33136843763), iresult_m = c(0.00356446552325877, 
    0.00421364007550875, 0.00406754352800449, 0.00365981038551155, 
    0.0038572688325682, 0.0040958776944954, 0.00382406510187511, 
    0.0042169990605352, 0.00400282852462449, 0.00444276248384841, 
    0.00475936721667692, 0.00435517049663636, 0.00402704589975837, 
    0.0043128925651774, 0.00484173333125022, 0.00478123827430327, 
    0.00481721410774609, 0.00401110217955023, 0.004682744853048, 
    0.00402663063339563, 0.00480533136843763)), row.names = 1760:1780, class = "data.frame")

这是我目前正在尝试运行的代码要点,目前正在考虑使用 if 语句方法:

Data %>% 
 mutate(iresult_pos = ifelse(ISO= 'ALL', (sum of the rows in iresult_posPPP where DataYear, Division, FurtherDetails1 and FutherDetails2 are the same), iresult_posPPP)

有没有一种整洁的方法可以做到这一点,或者我可能错过了一个步骤,因为我一直在绞尽脑汁/绕过堆栈溢出线程并且想不出一个好的方法来做到这一点。任何帮助将不胜感激!

谢谢!

【问题讨论】:

    标签: r dplyr tidyverse rowsum


    【解决方案1】:

    也许我们需要一个按总和分组

    library(dplyr)
    Data %>% 
       group_by(DataYear, Division, FurtherDetails1, FurtherDetails2) %>% 
       mutate(iresult_pos =sum(iresult_posPPP))
    

    【讨论】:

    • 我想这就是我要找的东西,谢谢!然后我可以使用 ifelse 来使用 ISO=ALL 的 groupby 值和 iresult_posPPP 否则,干杯:)
    【解决方案2】:

    另一种选择,使用虚拟数据集来展示其工作原理:

    # create dummy data
    data <- data.frame(replicate(10,sample(0:1,1000,rep=TRUE)))
    
    # sum X2, X3, X4, X9 where X1 == 1, store as X11
    data <- data %>% mutate(
            X11 = rowSums(select_(.,'X2','X3','X4','X9')))
    

    【讨论】:

      猜你喜欢
      • 2018-10-23
      • 2022-10-07
      • 2017-05-11
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 1970-01-01
      • 2017-01-05
      • 1970-01-01
      相关资源
      最近更新 更多