{
string file_name;
string str;
file_name = data_file_path +
lang;
file_name += ".cube.bigrams";
}
if (char_bigrams_obj ==
NULL) {
fprintf(stderr, "Cube ERROR (CharBigrams::Create): could not create "
"character bigrams object.\n");
}
CharBigramTable *table = &char_bigrams_obj->bigram_table_;
table->total_cnt = 0;
table->max_char = -1;
table->char_bigram =
NULL;
vector<string> str_vec;
for (int big = 0; big < str_vec.size(); big++) {
int cnt;
if (sscanf(str_vec[big].c_str(), "%d %x %x", &cnt, &ch1, &ch2) != 3) {
fprintf(stderr, "Cube ERROR (CharBigrams::Create): invalid format "
"reading line: %s\n", str_vec[big].c_str());
}
if (ch1 > table->max_char) {
CharBigram *char_bigram = new CharBigram[ch1 + 1];
if (char_bigram ==
NULL) {
fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
"additional memory for character bigram table.\n");
}
if (table->char_bigram !=
NULL && table->max_char >= 0) {
memcpy(char_bigram, table->char_bigram,
(table->max_char + 1) * sizeof(*char_bigram));
delete []table->char_bigram;
}
table->char_bigram = char_bigram;
for (int new_big = table->max_char + 1; new_big <= ch1; new_big++) {
table->char_bigram[new_big].total_cnt = 0;
table->char_bigram[new_big].max_char = -1;
table->char_bigram[new_big].bigram =
NULL;
}
table->max_char = ch1;
}
if (ch2 > table->char_bigram[ch1].max_char) {
Bigram *bigram = new Bigram[ch2 + 1];
fprintf(stderr, "Cube ERROR (CharBigrams::Create): error allocating "
"memory for bigram.\n");
}
if (table->char_bigram[ch1].bigram !=
NULL &&
table->char_bigram[ch1].max_char >= 0) {
memcpy(bigram, table->char_bigram[ch1].bigram,
(table->char_bigram[ch1].max_char + 1) * sizeof(*bigram));
delete []table->char_bigram[ch1].bigram;
}
table->char_bigram[ch1].bigram = bigram;
for (int new_big = table->char_bigram[ch1].max_char + 1;
new_big <= ch2; new_big++) {
table->char_bigram[ch1].bigram[new_big].cnt = 0;
}
table->char_bigram[ch1].max_char = ch2;
}
table->char_bigram[ch1].bigram[ch2].cnt = cnt;
table->char_bigram[ch1].total_cnt += cnt;
table->total_cnt += cnt;
}
table->worst_cost = static_cast<int>(
for (
char_32 ch1 = 0; ch1 <= table->max_char; ch1++) {
for (
char_32 ch2 = 0; ch2 <= table->char_bigram[ch1].max_char; ch2++) {
int cnt = table->char_bigram[ch1].bigram[ch2].cnt;
table->char_bigram[ch1].bigram[ch2].cost =
log(
MAX(0.5, static_cast<double>(cnt)) /
table->total_cnt));
}
}
return char_bigrams_obj;
}