{
string file_name;
string str;
file_name = data_file_path +
lang;
file_name += ".cube.word-freq";
}
vector<string> str_vec;
if (str_vec.size() < 2) {
}
if (word_unigrams_obj ==
NULL) {
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): could not create "
"word unigrams object.\n");
}
int full_len = str.length();
int word_cnt = str_vec.size() / 2;
word_unigrams_obj->words_ = new char*[word_cnt];
word_unigrams_obj->costs_ = new int[word_cnt];
if (word_unigrams_obj->words_ ==
NULL ||
word_unigrams_obj->costs_ ==
NULL) {
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating "
"word unigram fields.\n");
delete word_unigrams_obj;
}
word_unigrams_obj->words_[0] = new char[full_len];
if (word_unigrams_obj->words_[0] ==
NULL) {
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error allocating "
"word unigram fields.\n");
delete word_unigrams_obj;
}
word_unigrams_obj->word_cnt_ = 0;
char *char_buff = word_unigrams_obj->words_[0];
word_cnt = 0;
int max_cost = 0;
for (int wrd = 0; wrd < str_vec.size(); wrd += 2) {
word_unigrams_obj->words_[word_cnt] = char_buff;
strcpy(char_buff, str_vec[wrd].c_str());
char_buff += (str_vec[wrd].length() + 1);
if (sscanf(str_vec[wrd + 1].c_str(), "%d",
word_unigrams_obj->costs_ + word_cnt) != 1) {
fprintf(stderr, "Cube ERROR (WordUnigrams::Create): error reading "
"word unigram data.\n");
delete word_unigrams_obj;
}
max_cost =
MAX(max_cost, word_unigrams_obj->costs_[word_cnt]);
word_cnt++;
}
word_unigrams_obj->word_cnt_ = word_cnt;
word_unigrams_obj->not_in_list_cost_ = max_cost +
return word_unigrams_obj;
}