在对每个建议的候选者进行测试和计时后,结果是 strtok 显然是最快的。尽管我对测试的热爱并不感到惊讶,但值得探索其他选择。 [注意:代码被放在一起编辑欢迎:)]
给定:
typedef struct sTokenDataC {
char *time;
char *symb;
float bid;
float ask;
int bidSize;
int askSize;
} tokenDataC;
tokenDataC parseTick( char *line, char *parseBuffer )
{
tokenDataC tokenDataOut;
tokenDataOut.time = strtok_s( line,",", &parseBuffer );
tokenDataOut.symb = strtok_s( null,",", &parseBuffer );
tokenDataOut.bid = atof(strtok_s( null,",", &parseBuffer ));
tokenDataOut.ask = atof(strtok_s( null , ",", &parseBuffer ));
tokenDataOut.bidSize = atoi(strtok_s( null,",", &parseBuffer ));
tokenDataOut.askSize = atoi(strtok_s( null, ",", &parseBuffer ));
return tokenDataOut;
}
char *testStringC = new char[64];
strcpy(testStringC,"09:30:00,TEST,13.24,15.32,10,14");
int _tmain(int argc, _TCHAR* argv[])
{
char *lineBuffer = new char[64];
printf("Testing method2....\n");
for(int i = 0; i < ITERATIONS; i++)
{
strcpy(lineBuffer,testStringC);//this is more realistic since this has to happen because I want to preserve the line
tokenData2 = parseTick(lineBuffer,parseBuffer);
}
}
vs 通过以下方式调用 John Diblings impl:
struct sTokenDataCPP
{
std::basic_string<char> time;
std::basic_string<char> symb;
float bid;
float ask;
int bidSize;
int askSize;
};
std::vector<myToken> tokens1;
tokenDataCPP tokenData;
printf("Testing method1....\n");
for(int i = 0; i < ITERATIONS; i++)
{
tokens1.clear();
tokenize(raw, ",", std::back_inserter(tokens1));
tokenData.time.assign(tokens1.at(0).token_);
tokenData.symb.assign(tokens1.at(1).token_);
tokenData.ask = atof(tokens1.at(2).token_.c_str());
tokenData.bid = atof(tokens1.at(3).token_.c_str());
tokenData.askSize = atoi(tokens1.at(4).token_.c_str());
tokenData.bidSize = atoi(tokens1.at(5).token_.c_str());
}
与定义语法的简单 boost.spirit.qi 实现如下:
template <typename Iterator>
struct tick_parser : grammar<Iterator, tokenDataCPP(), boost::spirit::ascii::space_type>
{
tick_parser() : tick_parser::base_type(start)
{
my_string %= lexeme[+(boost::spirit::ascii::char_ ) ];
start %=
my_string >> ','
>> my_string >> ','
>> float_ >> ','
>> float_ >> ','
>> int_ >> ','
>> int_
;
}
rule<Iterator, std::string(), boost::spirit::ascii::space_type> my_string;
rule<Iterator, sTokenDataCPP(), boost::spirit::ascii::space_type> start;
};
将迭代次数设置为 500k:
strtok 版本:2s
约翰的版本:115s
提升:172s
如果人们想要这个,我可以发布完整的代码,我只是不想占用大量空间