【问题标题】:multiplying 2 zoo series in R在R中乘以2个动物园系列
【发布时间】:2016-09-23 00:52:30
【问题描述】:

以下是整个数据的一小部分,多年来我有数千个符号。 . . 符号和日期范围因运行而异

我有 2 个动物园系列“returns”和“decFac”。

    > tail(returns)
                    AAPL         DISCA          IBM           JNJ            KO
2014-12-23 -0.0035479832  0.0137774854  0.004943048 -0.0233164191  0.0145336114
2014-12-24 -0.0047206092 -0.0054309123 -0.002592361  0.0029684238 -0.0006984054
2014-12-26  0.0175226064 -0.0005733945  0.003208447  0.0044836732  0.0004657399
2014-12-29 -0.0007020609            NA           NA  0.0025666222 -0.0023303779
2014-12-30 -0.0122776892            NA           NA  0.0002847851 -0.0023360686
2014-12-31 -0.0192020576 -0.0219631307  0.002433726 -0.0075263261 -0.0127090448
                     NKE           TXN
2014-12-23  0.0004169359 -0.0007298205
2014-12-24  0.0033288228  0.0014592993
2014-12-26  0.0055922518 -0.0020985205
2014-12-29            NA            NA
2014-12-30            NA            NA
2014-12-31 -0.0075636285 -0.0086595788

> tail(decFac)
2014-12-23 2014-12-24 2014-12-26 2014-12-29 2014-12-30 2014-12-31 
0.02576202 0.02655878 0.02738019 0.02822700 0.02910000 0.03000000 

这两者都具有“2012-01-04 tp 2014-12-31 的动物园系列”的值(根据 R-Studio)

各数据类型如下:

> sapply(returns, typeof)
    AAPL    DISCA      IBM      JNJ       KO      NKE      TXN 
"double" "double" "double" "double" "double" "double" "double" 
> sapply(decFac, typeof)
[1] "double"

我的目标是让每一天的每只股票收益乘以同一天的 decFac

AAPL 前 5 天的预期结果如下:

                 AAPL
12/23/2014  -0.000091403
12/24/2014  -0.000125374
12/26/2014   0.000479772
12/29/2014  -0.000019817
12/30/2014  -0.000357281
12/31/2014  -0.000576062

【问题讨论】:

    标签: r zoo


    【解决方案1】:

    zoo 和 xts 对象将在操作前按索引对齐:

    library(xts)
    
    time = seq.Date(as.Date('2014-12-23'), as.Date('2014-12-31'), by = 'day')
    time = time[c(1,2,4,7:9)]  
    
    AAPL = c( -0.0035479832, -0.0047206092,  0.0175226064,
              -0.0007020609, -0.0122776892, -0.0192020576 )
    DISCA = c( 0.0137774854, -0.0054309123 , -0.0005733945 , 
               NA, NA, -0.0219631307 )
    IBM = c( 0.004943048,  -0.002592361,  0.003208447,  
             NA,  NA,  0.002433726  )
    JNJ = c( -0.0233164191,  0.0029684238,  0.0044836732, 
              0.0025666222,  0.0002847851,  -0.0075263261  )
    KO = c( 0.0145336114, -0.0006984054, 0.0004657399,
            -0.0023303779, -0.0023360686, -0.0127090448)
    NKE = c( 0.0004169359,  0.0033288228,  0.0055922518, 
             NA,  NA,  -0.0075636285 )
    TXN = c( -0.0007298205, 0.0014592993, -0.0020985205, 
             NA, NA, -0.0086595788 )
    decFac_v = c( 0.02576202,  0.02655878,  0.02738019, 
                  0.02822700,  0.02910000,  0.03000000  )
    
    returns_zoo = zoo( cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN),  time)  
    returns     = xts( cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN),  time)  
    decFac_zoo  = drop( zoo( decFac_v, time ))
    decFac      = drop( xts( decFac_v, time ))
    

    将 zoo 或 xts 对象相乘应该可以:

    returns * decFac
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  3.549359e-04  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-26  4.797723e-04 -1.569965e-05  8.784789e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -6.588939e-04  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    returns_zoo * decFac_zoo
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  3.549359e-04  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-26  4.797723e-04 -1.569965e-05  8.784789e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -6.588939e-04  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    

    考虑一下如果您要对 returnsdecFac 对象进行子集化会发生什么:

    # subsetting
    x = zoo( cbind(AAPL, DISCA, IBM, JNJ, KO, NKE, TXN),  time)  
    y = drop( zoo( decFac_v, time))
    
    x * y
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  3.549359e-04  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-26  4.797723e-04 -1.569965e-05  8.784789e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -6.588939e-04  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    
    x * y[-3] # does not return values corresponding to the third date index
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  0.0003549359  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -0.0001442384 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -0.0006588939  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    
    x[-3] * y # does not return values corresponding to the third date index
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  0.0003549359  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -0.0001442384 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -0.0006588939  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    
    x[,-3] * y # does not return values corresponding to the 3rd symbol column
    #                  AAPL         DISCA           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  3.549359e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -1.442384e-04  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-26  4.797723e-04 -1.569965e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05
    # 2014-12-29 -1.981707e-05            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -6.588939e-04 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    

    考虑如果你扩大日期范围会发生什么:

    # expanding time dimension
    expanded_time = seq.Date(as.Date('2012-01-04'), 
                             as.Date('2014-12-22'), 
                             by = 'day')
    
    value = rep_len(1, length(expanded_time))
    old_returns = xts( cbind(AAPL = value, 
                             DISCA = value, 
                             IBM = value, 
                             JNJ = value, 
                             KO = value, 
                             NKE = value, 
                             TXN = value), 
                       expanded_time)
    
    returns_expanded_time = xts( rbind(old_returns, returns), c(expanded_time, time) )
    returns_expanded_time * decFac  
    # returns only values where the date index of each object matches:
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN
    # 2014-12-23 -9.140321e-05  3.549359e-04  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05
    # 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05
    # 2014-12-26  4.797723e-04 -1.569965e-05  8.784789e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA
    # 2014-12-31 -5.760617e-04 -6.588939e-04  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04
    

    考虑一下如果你要附加额外的列会发生什么:

    new_column1 = rep_len(1, length(c(expanded_time, time)))
    new_column2 = new_column1
    
    returns_expanded_cols = xts( 
      cbind( rbind(old_returns, returns), 
             nc1 = new_column1, 
             nc2 =new_column2),
      c(expanded_time, time) )
    
    returns_expanded_cols * decFac
    # returns only values where the date index of each object matches,
    # including the two new columns, `nc1` and `nc2`
    #                  AAPL         DISCA           IBM           JNJ            KO           NKE           TXN        nc1        nc2
    # 2014-12-23 -9.140321e-05  3.549359e-04  1.273429e-04 -6.006781e-04  3.744152e-04  1.074111e-05 -1.880165e-05 0.02576202 0.02576202
    # 2014-12-24 -1.253736e-04 -1.442384e-04 -6.884995e-05  7.883771e-05 -1.854880e-05  8.840947e-05  3.875721e-05 0.02655878 0.02655878
    # 2014-12-26  4.797723e-04 -1.569965e-05  8.784789e-05  1.227638e-04  1.275205e-05  1.531169e-04 -5.745789e-05 0.02738019 0.02738019
    # 2014-12-29 -1.981707e-05            NA            NA  7.244804e-05 -6.577958e-05            NA            NA 0.02822700 0.02822700
    # 2014-12-30 -3.572808e-04            NA            NA  8.287246e-06 -6.797960e-05            NA            NA 0.02910000 0.02910000
    # 2014-12-31 -5.760617e-04 -6.588939e-04  7.301178e-05 -2.257898e-04 -3.812713e-04 -2.269089e-04 -2.597874e-04 0.03000000 0.03000000
    

    【讨论】:

    • 我不确定如何实施您的建议,我有多年的数据和数千个符号(我正在更新我的问题以指出这一点......我确实指出时间是 2 年《2012-01-04 至 2014-12-31 动物园系列》
    • 我的建议是将returns 乘以decFrac。对于每种情况,结果将是两个对象的乘积,其中returnsdecFrac 中的值具有匹配的日期索引。此概念仍适用于与数据集尾部不同的日期和列范围。我将添加到我的答案中,以说明在子集或扩展任一对象的范围时产品将是什么。
    猜你喜欢
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 1970-01-01
    • 2012-02-27
    • 2012-03-30
    • 1970-01-01
    • 1970-01-01
    • 2023-03-13
    相关资源
    最近更新 更多