【发布时间】:2020-09-21 18:23:44
【问题描述】:
我正在尝试读取大约 190 列的一系列文件,并希望为每列提供 tits 类型(即字符、日期、数字等),因为 vroom 似乎不会在第一个读取负数行始终为数字。希望这是一个可用的最小示例,它仍然使用数据。数据来自 CFTC(商品期货交易委员会)的交易员承诺报告。
我的例子如下:
library(vroom)
url2='[https://www.cftc.gov/files/dea/history/com_disagg_txt_2018.zip][1]'
download.file(url2,'CFTC_COT.zip')
unzip('CFTC_COT.zip', exdir = 'CFTC_COT')
data <- vroom('CFTC_COT/c_year.txt') #Woorks
data2 <- vroom( 'CFTC_COT/c_year.txt'
, col_types = c(col_character(),col_double(), col_date(format = ""),col_double(),rep(col_character(),3), rep(col_double(),177), rep( col_character(),6)))
spec(data)
规格数据如下:
cols(
Market_and_Exchange_Names = col_character(),
As_of_Date_In_Form_YYMMDD = col_double(),
`Report_Date_as_YYYY-MM-DD` = col_date(format = ""),
CFTC_Contract_Market_Code = col_character(),
CFTC_Market_Code = col_character(),
CFTC_Region_Code = col_character(),
CFTC_Commodity_Code = col_character(),
Open_Interest_All = col_double(),
Prod_Merc_Positions_Long_All = col_double(),
Prod_Merc_Positions_Short_All = col_double(),
Swap_Positions_Long_All = col_double(),
Swap__Positions_Short_All = col_double(),
Swap__Positions_Spread_All = col_double(),
M_Money_Positions_Long_All = col_double(),
M_Money_Positions_Short_All = col_double(),
M_Money_Positions_Spread_All = col_double(),
Other_Rept_Positions_Long_All = col_double(),
Other_Rept_Positions_Short_All = col_double(),
Other_Rept_Positions_Spread_All = col_double(),
Tot_Rept_Positions_Long_All = col_double(),
Tot_Rept_Positions_Short_All = col_double(),
NonRept_Positions_Long_All = col_double(),
NonRept_Positions_Short_All = col_double(),
Open_Interest_Old = col_double(),
Prod_Merc_Positions_Long_Old = col_double(),
Prod_Merc_Positions_Short_Old = col_double(),
Swap_Positions_Long_Old = col_double(),
Swap__Positions_Short_Old = col_double(),
Swap__Positions_Spread_Old = col_double(),
M_Money_Positions_Long_Old = col_double(),
M_Money_Positions_Short_Old = col_double(),
M_Money_Positions_Spread_Old = col_double(),
Other_Rept_Positions_Long_Old = col_double(),
Other_Rept_Positions_Short_Old = col_double(),
Other_Rept_Positions_Spread_Old = col_double(),
Tot_Rept_Positions_Long_Old = col_double(),
Tot_Rept_Positions_Short_Old = col_double(),
NonRept_Positions_Long_Old = col_double(),
NonRept_Positions_Short_Old = col_double(),
Open_Interest_Other = col_double(),
Prod_Merc_Positions_Long_Other = col_double(),
Prod_Merc_Positions_Short_Other = col_double(),
Swap_Positions_Long_Other = col_double(),
Swap__Positions_Short_Other = col_double(),
Swap__Positions_Spread_Other = col_double(),
M_Money_Positions_Long_Other = col_double(),
M_Money_Positions_Short_Other = col_double(),
M_Money_Positions_Spread_Other = col_double(),
Other_Rept_Positions_Long_Other = col_double(),
Other_Rept_Positions_Short_Other = col_double(),
Other_Rept_Positions_Spread_Other = col_double(),
Tot_Rept_Positions_Long_Other = col_double(),
Tot_Rept_Positions_Short_Other = col_double(),
NonRept_Positions_Long_Other = col_double(),
NonRept_Positions_Short_Other = col_double(),
Change_in_Open_Interest_All = col_character(),
Change_in_Prod_Merc_Long_All = col_character(),
Change_in_Prod_Merc_Short_All = col_character(),
Change_in_Swap_Long_All = col_character(),
Change_in_Swap_Short_All = col_character(),
Change_in_Swap_Spread_All = col_character(),
Change_in_M_Money_Long_All = col_character(),
Change_in_M_Money_Short_All = col_character(),
Change_in_M_Money_Spread_All = col_character(),
Change_in_Other_Rept_Long_All = col_character(),
Change_in_Other_Rept_Short_All = col_character(),
Change_in_Other_Rept_Spread_All = col_character(),
Change_in_Tot_Rept_Long_All = col_character(),
Change_in_Tot_Rept_Short_All = col_character(),
Change_in_NonRept_Long_All = col_character(),
Change_in_NonRept_Short_All = col_character(),
Pct_of_Open_Interest_All = col_double(),
Pct_of_OI_Prod_Merc_Long_All = col_double(),
Pct_of_OI_Prod_Merc_Short_All = col_double(),
Pct_of_OI_Swap_Long_All = col_double(),
Pct_of_OI_Swap_Short_All = col_double(),
Pct_of_OI_Swap_Spread_All = col_double(),
Pct_of_OI_M_Money_Long_All = col_double(),
Pct_of_OI_M_Money_Short_All = col_double(),
Pct_of_OI_M_Money_Spread_All = col_double(),
Pct_of_OI_Other_Rept_Long_All = col_double(),
Pct_of_OI_Other_Rept_Short_All = col_double(),
Pct_of_OI_Other_Rept_Spread_All = col_double(),
Pct_of_OI_Tot_Rept_Long_All = col_double(),
Pct_of_OI_Tot_Rept_Short_All = col_double(),
Pct_of_OI_NonRept_Long_All = col_double(),
Pct_of_OI_NonRept_Short_All = col_double(),
Pct_of_Open_Interest_Old = col_double(),
Pct_of_OI_Prod_Merc_Long_Old = col_double(),
Pct_of_OI_Prod_Merc_Short_Old = col_double(),
Pct_of_OI_Swap_Long_Old = col_double(),
Pct_of_OI_Swap_Short_Old = col_double(),
Pct_of_OI_Swap_Spread_Old = col_double(),
Pct_of_OI_M_Money_Long_Old = col_double(),
Pct_of_OI_M_Money_Short_Old = col_double(),
Pct_of_OI_M_Money_Spread_Old = col_double(),
Pct_of_OI_Other_Rept_Long_Old = col_double(),
Pct_of_OI_Other_Rept_Short_Old = col_double(),
Pct_of_OI_Other_Rept_Spread_Old = col_double(),
Pct_of_OI_Tot_Rept_Long_Old = col_double(),
Pct_of_OI_Tot_Rept_Short_Old = col_double(),
Pct_of_OI_NonRept_Long_Old = col_double(),
Pct_of_OI_NonRept_Short_Old = col_double(),
Pct_of_Open_Interest_Other = col_double(),
Pct_of_OI_Prod_Merc_Long_Other = col_double(),
Pct_of_OI_Prod_Merc_Short_Other = col_double(),
Pct_of_OI_Swap_Long_Other = col_double(),
Pct_of_OI_Swap_Short_Other = col_double(),
Pct_of_OI_Swap_Spread_Other = col_double(),
Pct_of_OI_M_Money_Long_Other = col_double(),
Pct_of_OI_M_Money_Short_Other = col_double(),
Pct_of_OI_M_Money_Spread_Other = col_double(),
Pct_of_OI_Other_Rept_Long_Other = col_double(),
Pct_of_OI_Other_Rept_Short_Other = col_double(),
Pct_of_OI_Other_Rept_Spread_Other = col_double(),
Pct_of_OI_Tot_Rept_Long_Other = col_double(),
Pct_of_OI_Tot_Rept_Short_Other = col_double(),
Pct_of_OI_NonRept_Long_Other = col_double(),
Pct_of_OI_NonRept_Short_Other = col_double(),
Traders_Tot_All = col_double(),
Traders_Prod_Merc_Long_All = col_character(),
Traders_Prod_Merc_Short_All = col_character(),
Traders_Swap_Long_All = col_character(),
Traders_Swap_Short_All = col_character(),
Traders_Swap_Spread_All = col_character(),
Traders_M_Money_Long_All = col_character(),
Traders_M_Money_Short_All = col_character(),
Traders_M_Money_Spread_All = col_character(),
Traders_Other_Rept_Long_All = col_character(),
Traders_Other_Rept_Short_All = col_character(),
Traders_Other_Rept_Spread_All = col_character(),
Traders_Tot_Rept_Long_All = col_double(),
Traders_Tot_Rept_Short_All = col_double(),
Traders_Tot_Old = col_double(),
Traders_Prod_Merc_Long_Old = col_character(),
Traders_Prod_Merc_Short_Old = col_character(),
Traders_Swap_Long_Old = col_character(),
Traders_Swap_Short_Old = col_character(),
Traders_Swap_Spread_Old = col_character(),
Traders_M_Money_Long_Old = col_character(),
Traders_M_Money_Short_Old = col_character(),
Traders_M_Money_Spread_Old = col_character(),
Traders_Other_Rept_Long_Old = col_character(),
Traders_Other_Rept_Short_Old = col_character(),
Traders_Other_Rept_Spread_Old = col_character(),
Traders_Tot_Rept_Long_Old = col_double(),
Traders_Tot_Rept_Short_Old = col_double(),
Traders_Tot_Other = col_double(),
Traders_Prod_Merc_Long_Other = col_double(),
Traders_Prod_Merc_Short_Other = col_double(),
Traders_Swap_Long_Other = col_character(),
Traders_Swap_Short_Other = col_character(),
Traders_Swap_Spread_Other = col_character(),
Traders_M_Money_Long_Other = col_character(),
Traders_M_Money_Short_Other = col_character(),
Traders_M_Money_Spread_Other = col_character(),
Traders_Other_Rept_Long_Other = col_double(),
Traders_Other_Rept_Short_Other = col_character(),
Traders_Other_Rept_Spread_Other = col_double(),
Traders_Tot_Rept_Long_Other = col_double(),
Traders_Tot_Rept_Short_Other = col_double(),
Conc_Gross_LE_4_TDR_Long_All = col_double(),
Conc_Gross_LE_4_TDR_Short_All = col_double(),
Conc_Gross_LE_8_TDR_Long_All = col_double(),
Conc_Gross_LE_8_TDR_Short_All = col_double(),
Conc_Net_LE_4_TDR_Long_All = col_double(),
Conc_Net_LE_4_TDR_Short_All = col_double(),
Conc_Net_LE_8_TDR_Long_All = col_double(),
Conc_Net_LE_8_TDR_Short_All = col_double(),
Conc_Gross_LE_4_TDR_Long_Old = col_double(),
Conc_Gross_LE_4_TDR_Short_Old = col_double(),
Conc_Gross_LE_8_TDR_Long_Old = col_double(),
Conc_Gross_LE_8_TDR_Short_Old = col_double(),
Conc_Net_LE_4_TDR_Long_Old = col_double(),
Conc_Net_LE_4_TDR_Short_Old = col_double(),
Conc_Net_LE_8_TDR_Long_Old = col_double(),
Conc_Net_LE_8_TDR_Short_Old = col_double(),
Conc_Gross_LE_4_TDR_Long_Other = col_double(),
Conc_Gross_LE_4_TDR_Short_Other = col_double(),
Conc_Gross_LE_8_TDR_Long_Other = col_double(),
Conc_Gross_LE_8_TDR_Short_Other = col_double(),
Conc_Net_LE_4_TDR_Long_Other = col_double(),
Conc_Net_LE_4_TDR_Short_Other = col_double(),
Conc_Net_LE_8_TDR_Long_Other = col_double(),
Conc_Net_LE_8_TDR_Short_Other = col_double(),
Contract_Units = col_character(),
CFTC_Contract_Market_Code_Quotes = col_character(),
CFTC_Market_Code_Quotes = col_character(),
CFTC_Commodity_Code_Quotes = col_character(),
CFTC_SubGroup_Code = col_character(),
FutOnly_or_Combined = col_character(),
.delim = ","
)
我可以下载数据,并在数据文件下读入,但是当我尝试通过定义列类型来读入它时,它失败并显示以下消息:错误:未知快捷方式: em>
从 vroom 的文档中,我可以用 name = col_type() 定义每一列,但我想将它们分组定义,因为有 177 列应该连续成双,但它开始有一些列作为 col_character()。
为了帮助再次澄清,我正在寻找如何使用 rep() 或类似函数定义具有一行中的许多列的列类型,而不是定义每个列的名称,以便我可以一次读取许多不同的文件。非常感谢您的帮助。
【问题讨论】: