有没有更有效的方法来从客户投资组合中计算每月投资组合价值？答案

【问题标题】：Is there a more efficient way to calculate monthly portfolio values from customer portfolios?有没有更有效的方法来从客户投资组合中计算每月投资组合价值？
【发布时间】：2019-07-08 03:50:16
【问题描述】：

以下两个数据帧是我目前正在处理的数据的一个 sn-p。 df1 包含投资者的历史记录（以 id 分隔）以及他们拥有的不同产品的股份数量。每次股份数量发生变化时，都会创建一个新条目。 df2 包含与产品对应的价格。

我正在尝试计算所有客户在其投资期间的每月投资组合价值。
这是 df1 和 df2 的可重现示例：

library(dplyr)
library(lubridate)
library(timeDate)

#create df1 customer portfolio history 
id <- c("1","1","1","1","2","2","2","3","3","3","3","3")
df1 <- data.frame(id)
df1$start <- as.Date(c("2012-03-11", "2012-04-17","2012-05-09", "2012-05-11", "2012-11-17","2012-12-09",
"2013-01-21", "2011-06-27","2012-07-02", "2012-07-21", "2012-09-03","2012-09-16"))
df1$end <- as.Date(c("2012-05-08", "2012-05-21","2012-06-11", "2012-11-16", "2012-12-08","2013-01-20",
"2013-02-03", "2011-07-01","2012-09-15", "2012-09-02", "2012-09-20","2012-09-16"))
df1$product <- c("a","b","a","b","b","b","b","c","c","a","a","c")
df1$amount <- as.numeric(c("5","12","7","11","3","8","6","4","1","16","17","9"))

#create df2 with corresponding Prices 
date <- seq.Date(from = as.Date("2011-05-01"), to = as.Date("2013-02-01"), by = "month")
df2 <- data.frame(date)
df2$product <- "a"
date <- seq.Date(from = as.Date("2012-04-01"), to = as.Date("2013-02-01"), by = "month")
date <- data.frame(date)
date$product <- "b"
df2 <- rbind(df2,date)
date <- seq.Date(from = as.Date("2011-06-01"), to = as.Date("2012-09-01"), by = "month")
date <- data.frame(date)
date$product <- "c"
df2 <- rbind(df2,date)
df2$price <- as.numeric(sample(100, size = nrow(df2), replace = TRUE))
df2$date <- as.Date(timeLastDayInMonth(df2$date))

我最终做的是将我的投资者数据分散到一个广泛的格式中，并在每个月底人为地添加一行日期。然后我继续对我的价格数据做同样的事情，将两者结合起来，最终用 rowSums 计算投资组合值。

这是我上面数据框的代码：

#convert to wide data
df1 <- df1 %>%
spread(product, amount, fill = NA, convert = FALSE) 
colnames(df1)[4:6] <- paste("xxx", colnames(df1[,c(4:6)]), sep = "_")    

#add end of month observations to data frame
seq <- df1 %>%
  group_by(id) %>%
  summarize(start= floor_date(AddMonths(min(start),-1), "month"),end=max(end)) %>%
  group_by(rn=row_number()) %>%
  do(data.frame(id=.$id, datum=seq(.$start,.$end,by="1 month"))) %>%
  ungroup() %>%
  select(-rn)

seq <- seq %>%
  group_by(id) %>%
  mutate(start = as.Date(timeLastDayInMonth(datum))) %>%
  ungroup() %>%
  select(-2)

df1 <- full_join(df1,seq, by = c("id","start"))
df1 <- df1[with(df1, order(id, start)),]

#create grouping variable and filter all end of month data
df1<- df1 %>%
  group_by(id) %>%
  mutate(grp = as.numeric(as.Date(start)- as.Date(timeLastDayInMonth(start))))

df1 <- df1 %>% 
  group_by(id) %>% 
  fill(4:6, .direction = "down")

df1 <- filter(df1, grp == 0)

na_sub <- function(x) { x[is.na(x)] <- 0; x }
df1 <- df1 %>%
  select(-end, -grp) %>%
  na_sub()

#Join both wide dataframes and calculate monthly portfoliovalues
df2 <- df2 %>%
  spread(product, price, fill = NA, convert = FALSE) 
colnames(df2)[2:4] <- paste("yyy", colnames(df2[,c(2:4)]), sep = "_")
names(df2)[names(df2) == "date"] <- "start"

df1 <- left_join(df1, df2, by = "start") 

df1$portfoliovalue <- rowSums(select(df1, starts_with("xxx_")) * select(df1, starts_with("yyy_")), na.rm = TRUE)

代码通过每个投资者的每月投资组合价值得出预期结果。正如我所提到的，这是整个数据的 sn-p。不幸的是，我遇到了麻烦，尤其是宽数据框的大小（由于产品数量的增加，它们获得了大量的列）。这使得无法使用更大的数据集运行代码。是否可以将数据保留为长格式以进行计算？是否有提供此类计算程序的软件包？

【问题讨论】：

标签： r dataframe portfolio

【解决方案1】：

也许PMwR 包中的一些功能可以提供帮助。（披露：我是包维护者。）使用PMwR，您可以创建日志（即流量、职位变化），然后计算职位。例如：

library("PMwR")
library("datetimeutils")
j <- journal(account    = rep(df1$id, 2),
             instrument = rep(df1$product, 2),
             amount     = c(df1$amount, -df1$amount),
             timestamp  = c(df1$start, df1$end))

month.ends <- nth_day(start = as.Date("2012-01-01"),
                      end   = as.Date("2012-12-31"),
                      n = "last", period = "month")

position(j, when = month.ends, use.account = TRUE)
##            1:a 1:b 2:b 3:a 3:c
## 2012-01-31   0   0   0   0   0
## 2012-02-29   0   0   0   0   0
## 2012-03-31   5   0   0   0   0
## 2012-04-30   5  12   0   0   0
## 2012-05-31   7  11   0   0   0
## 2012-06-30   0  11   0   0   0
## 2012-07-31   0  11   0  16   1
## 2012-08-31   0  11   0  16   1
## 2012-09-30   0  11   0   0   0
## 2012-10-31   0  11   0   0   0
## 2012-11-30   0   0   3   0   0
## 2012-12-31   0   0   3   0   0

更多详情请见manual。

更新当您使用 when 参数调用 position 时，您为when 中的每个值获取一个位置。一个简单的方法添加 id 是循环遍历 id，转换每个id 的位置到数据框中，然后合并这些数据帧。（不能直接拨打rbind，因为 id 的产品可能有所不同。）

accounts <- unique(j$account)
for (a in accounts)
    if (a == accounts[1]) {
        result <- data.frame(timestamp = month.ends,
                             id = a,
                             position(j[j$account == a],
                                      when = month.ends))
    } else {
        result <- merge(result,
                        data.frame(timestamp = month.ends,
                                   id = a,
                                   position(j[j$account == a],
                                            when = month.ends)),
                        all = TRUE)
    }

result[is.na(result)] <- 0
result
##     timestamp id  a  b c
## 1  2012-01-31  1  0  0 0
## 2  2012-01-31  2  0  0 0
## 3  2012-01-31  3  0  0 0
## 4  2012-02-29  1  0  0 0
## 5  2012-02-29  2  0  0 0
## 6  2012-02-29  3  0  0 0
## 7  2012-03-31  1  5  0 0
## 8  2012-03-31  2  0  0 0
## 9  2012-03-31  3  0  0 0
## 10 2012-04-30  1  5 12 0
## 11 2012-04-30  2  0  0 0
## 12 2012-04-30  3  0  0 0
## 13 2012-05-31  1  7 11 0
## 14 2012-05-31  2  0  0 0
## 15 2012-05-31  3  0  0 0
## 16 2012-06-30  1  0 11 0
## 17 2012-06-30  2  0  0 0
## 18 2012-06-30  3  0  0 0
## 19 2012-07-31  1  0 11 0
## 20 2012-07-31  2  0  0 0
## 21 2012-07-31  3 16  0 1
## 22 2012-08-31  1  0 11 0
## 23 2012-08-31  2  0  0 0
## 24 2012-08-31  3 16  0 1
## 25 2012-09-30  1  0 11 0
## 26 2012-09-30  2  0  0 0
## 27 2012-09-30  3  0  0 0
## 28 2012-10-31  1  0 11 0
## 29 2012-10-31  2  0  0 0
## 30 2012-10-31  3  0  0 0
## 31 2012-11-30  1  0  0 0
## 32 2012-11-30  2  0  3 0
## 33 2012-11-30  3  0  0 0
## 34 2012-12-31  1  0  0 0
## 35 2012-12-31  2  0  8 0
## 36 2012-12-31  3  0  0 0

【讨论】：

这确实是一个很好的软件包。祝贺你的工作！我有一个关于输出的问题。在您的输出中，id 使用命令position(j, when = month.ends, use.account = TRUE) 添加到列中。是否可以在时间戳旁边有一个带有id 的单独列，而不是使用产品复制列？否则这正是我正在寻找的。span>
谢谢。我已经扩展了我的答案。