【发布时间】:2021-11-19 18:07:26
【问题描述】:
我正在开发一个 C++ 程序来分析俄语文本中出现的字母。它按计划工作,但它的性能真的很差(比较我的其他 python 程序在近 10 秒内完成这项任务,而这个在大约 7 分钟内完成)。
所以我的问题是如何提高性能?我应该阅读什么来加深对这个问题的理解?
主要
#include <iostream>
#include <io.h>
#include <fcntl.h>
#include "text.h"
int main()
{
_setmode(_fileno(stdout), _O_U16TEXT);
Text_Container mytext("./text.txt");
mytext.initialize();
//std::wcout<< mytext.Display_text()<<std::endl;
mytext.print_dict(0);
mytext.print_dict(1);
return 0;
}
类.cpp
void Text_Container::print_map(std::wstring_view comment, const std::map<wchar_t, wchar_t>& m)
{
std::wcout << comment;
for (const auto& [key, value] : m) {
std::wcout << key << L" = " << value << L"; ";
}
std::wcout << L"\n";
}
void Text_Container::print_dict(int mode)
{
if (mode == 0) {
std::wcout << "Dictionary with no whitespaces\n";
for (const auto& [key, value] : _dict) {
std::wcout << L"'" << key << L"' = " << value << L";\n";
}
std::wcout << L"\n";
}
if (mode == 1) {
std::wcout << "Dictionary with whitespaces\n";
for (const auto& [key, value] : _dict_w) {
std::wcout << L"'" << key << L"' = " << value << L";\n";
}
std::wcout << L"\n";
}
}
void Text_Container::read_file(const char* filename)
{
std::wifstream wif(filename);
wif.imbue(std::locale(std::locale::empty(), new std::codecvt_utf8<wchar_t>));
std::wstringstream wss;
wss << wif.rdbuf();
_text = wss.str();
}
void Text_Container::initialize()
{
/// <summary>
/// lowercase text
/// </summary>
/// <param name="mode"></param>
std::wstring new_s;
for (std::wstring::size_type i = 0; i < _text.size(); i++) {
wchar_t temp = iterate_over_map(_text[i]);
if (temp != '\1')
{ //whitespaces
wchar_t temp0 = iterate_over_dictionary(_text[i], 0);
if (temp0 != '\1')
{
_f_w_text = _f_w_text + temp0;
};
//no whitespaces
wchar_t temp1 = iterate_over_dictionary(_text[i], 1);
if (temp1 != '\1')
{
_f_text = _f_text + temp1;
};
new_s = new_s + temp;
};
}
_text = new_s;
}
//this function pring russian letters to lowercase
wchar_t Text_Container::iterate_over_map(wchar_t& temp) {
if (temp == L'Ё' || temp == L'Э' || temp == L'ё' || temp == L'э') {
temp = L'е';
}
if (temp == L'Ъ' || temp == L'ъ') {
temp = L'ь';
}
for (const auto& [key, value] : _m) {
if (temp == value) { return value; }
else { if (temp == key) { return value; } }
}
return L'\1';
}
//this fucntion verifies input letter and if it's in the selected dictionary increases value by 1
wchar_t Text_Container::iterate_over_dictionary(wchar_t& temp, int mode) {
std::map<wchar_t, int>::iterator itr;
if (mode == 0)
{
for (itr = _dict_w.begin(); itr != _dict_w.end(); ++itr) {
if (itr->first == temp) {
itr->second++;
return itr->first;
}
}
}
if (mode == 1)
{
for (itr = _dict.begin(); itr != _dict.end(); ++itr) {
if (itr->first == temp) {
itr->second++;
return itr->first;
}
}
}
return L'\1';
}
类.h
#define _SILENCE_ALL_CXX17_DEPRECATION_WARNINGS
#include <string_view>
#include <sstream>
#include <fstream>
#include <codecvt>
#include <iostream>
#include <string>
#include <map>
#include <string_view>
class Text_Container
{
private:
//dictionaries to filter text & also bring them to lowercase
const std::map<wchar_t, wchar_t> _m{ {L'А', L'а'}, {L'Б', L'б'}, {L'В', L'в'},
{L'Г', L'г'}, {L'Д', L'д'}, {L'Е', L'е'},
{L'Ж', L'ж'}, {L'З', L'з'}, {L'И', L'и'},
{L'Й', L'й'}, {L'К', L'к'}, {L'Л', L'л'},
{L'М', L'м'}, {L'Н', L'н'}, {L'О', L'о'},
{L'П', L'п'}, {L'Р', L'р'}, {L'С', L'с'},
{L'Т', L'т'}, {L'У', L'у'}, {L'Ф', L'ф'},
{L'Ч', L'ч'}, {L'Ц', L'ц'}, {L'Ш', L'ш'},
{L'Щ', L'щ'}, {L'Ы', L'ы'}, {L'Ь', L'ь'},
{L'Ю', L'ю'}, {L'Я', L'я'}, {L' ', L' '}, };
/*
std::map<wchar_t, int> _dict_w{ {L'а', 0}, {L'б', 0}, {L' ',0},}; //mode 0
std::map<wchar_t, int> _dict{ {L'а', 0}, {L'б', 0},}; //mode 1
*/
//mode 0
std::map<wchar_t, int> _dict_w{ {L'а', 0}, {L'б', 0}, {L'в', 0},
{L'г', 0}, {L'д', 0}, {L'е', 0},
{L'ж', 0}, {L'з', 0}, {L'и', 0},
{L'й', 0}, {L'к', 0}, {L'л', 0},
{L'м', 0}, {L'н', 0}, {L'о', 0},
{L'п', 0}, {L'р', 0}, {L'с', 0},
{L'т', 0}, {L'у', 0}, {L'ф', 0},
{L'ч', 0}, {L'ц', 0}, {L'ш', 0},
{L'щ', 0}, {L'ы', 0}, {L'ь', 0},
{L'ю', 0}, {L'я', 0}, {L' ', 0}, };
//mode 1
std::map<wchar_t, int> _dict{ {L'а', 0}, {L'б', 0}, {L'в', 0},
{L'г', 0}, {L'д', 0}, {L'е', 0},
{L'ж', 0}, {L'з', 0}, {L'и', 0},
{L'й', 0}, {L'к', 0}, {L'л', 0},
{L'м', 0}, {L'н', 0}, {L'о', 0},
{L'п', 0}, {L'р', 0}, {L'с', 0},
{L'т', 0}, {L'у', 0}, {L'ф', 0},
{L'ч', 0}, {L'ц', 0}, {L'ш', 0},
{L'щ', 0}, {L'ы', 0}, {L'ь', 0},
{L'ю', 0}, {L'я', 0}, };
//vars
std::wstring _text = L""; // inintial text
std::wstring _f_text = L""; //mode 1
std::wstring _f_w_text = L"";// mode 0
//methods
wchar_t iterate_over_map(wchar_t& temp);
wchar_t iterate_over_dictionary(wchar_t& temp, int mode);
public:
//constructor
Text_Container(const char* filename) { read_file(filename); };
//destructor
//methods
std::wstring Display_text() { return _text; }
std::map<wchar_t, wchar_t> give_m() { return _m; };
void print_map(std::wstring_view comment, const std::map<wchar_t, wchar_t>& m);
void read_file(const char* filename);
void print_dict(int mode);
void initialize();
};
【问题讨论】:
-
也许从 UTF 转换到 Windows-1251 是一个不错的决定?
-
你应该坚持使用 Unicode。读取额外的字节不需要时间。您的代码的哪一部分很慢,或者您认为它很慢?
-
您是否正在测量调试版本的性能?
-
@IgorTandetnik 我是,但我创建了一个发布版本,结果仍然很糟糕。
-
@BarmakShemirani 我的意思是摆脱 wstring wchar 的东西可能更容易,因为在我看来这些和 STL 的兼容性很差。
标签: c++ windows unicode stl c++17