我重新创建了一个场景,试图模拟您所面临的问题,以尽我对您的情况的理解。希望它至少能让您走上您正在寻找的答案的道路上。
您可以将以下代码复制粘贴到您的 R 控制台中以完成所有步骤。
library(dplyr)
######## Create the client database, assuming 4 different asset classes and an asset value of 1 for each of them.
df <- cbind.data.frame(clientId = c(1,1,2,3,3,3,4,4,4,5,5,6,6,7,8,9,9,10,10,10),AssetCategory= rep(c('a','b','c','d'),5),AssetValue =rep(c(1),20))
#Calculating the clients' total assets
totalAssetByClient <- df %>% group_by(clientId) %>% summarize(totalAssetByClient = sum(AssetValue))
# Appending TotalAssetByClient variable to the dataframe (client database) <- Answer to your FIRST question
df2 <- left_join(df,totalAssetByClient,by = "clientId")
# Then Create an empty dataset to host the AssetShareByClient table
AssetShareByClient <- data.frame(clienId = integer(), AssetCategory = character(), AssetShareByClient = double())
# Creating filling the AssetShareByClient table with a nested for Loop (sorry no easy way)
for (client in unique(df2$clientId))
{
for (asset in unique(df2$AssetCategory))
{
df3 <- filter(df2, clientId == client, AssetCategory == asset)
AssetShareByClient <- rbind(AssetShareByClient, c(client,asset,sum(df3$AssetValue)/mean(df3$totalAssetByClient)))
}
}
# We now have a standalone table with a column showing the proportion of investment per asset for each cient <- Answer to your SECOND question
# When the client has 0% share of an asset category it shows NaN. The sum of asset share category for each client sums to 100%
names(AssetShareByClient) = c("clientId","AssetCategory","AssetShareByClient")
print(AssetShareByClient)