26 #define strtok_r strtok_s
47 bool use_ambigs_for_adaption,
51 for (i = 0; i < unicharset->
size(); ++i) {
55 if (use_ambigs_for_adaption) {
60 if (debug_level)
tprintf(
"Reading ambiguities\n");
62 int TestAmbigPartSize;
63 int ReplacementAmbigPartSize;
70 const int kBufferSize = 10 + 2 * kMaxAmbigStringSize;
72 char ReplacementString[kMaxAmbigStringSize];
82 version =
static_cast<int>(strtol(buffer+1,
NULL, 10));
87 while ((end_offset < 0 || ftell(AmbigFile) < end_offset) &&
88 fgets(buffer, kBufferSize, AmbigFile) !=
NULL) {
90 if (debug_level > 2)
tprintf(
"read line %s\n", buffer);
92 if (!ParseAmbiguityLine(line_num, version, debug_level, *unicharset,
93 buffer, &TestAmbigPartSize, TestUnicharIds,
94 &ReplacementAmbigPartSize,
95 ReplacementString, &type))
continue;
98 InsertIntoTable((type ==
REPLACE_AMBIG) ? replace_ambigs_ : dang_ambigs_,
99 TestAmbigPartSize, TestUnicharIds,
100 ReplacementAmbigPartSize, ReplacementString, type,
101 ambig_spec, unicharset);
104 if (TestAmbigPartSize == 1 &&
106 if (one_to_one_definite_ambigs_[TestUnicharIds[0]] ==
NULL) {
107 one_to_one_definite_ambigs_[TestUnicharIds[0]] =
new UnicharIdVector();
109 one_to_one_definite_ambigs_[TestUnicharIds[0]]->
push_back(
113 if (use_ambigs_for_adaption) {
114 for (i = 0; i < TestAmbigPartSize; ++i) {
115 if (ambigs_for_adaption_[TestUnicharIds[i]] ==
NULL) {
118 adaption_ambigs_entry = ambigs_for_adaption_[TestUnicharIds[i]];
119 const char *tmp_ptr = ReplacementString;
120 const char *tmp_ptr_end = ReplacementString + strlen(ReplacementString);
121 int step = unicharset->
step(tmp_ptr);
127 for (j = 0; j < adaption_ambigs_entry->
size() &&
128 (*adaption_ambigs_entry)[j] > id_to_insert; ++j);
129 if (j < adaption_ambigs_entry->size()) {
130 if ((*adaption_ambigs_entry)[j] != id_to_insert) {
131 adaption_ambigs_entry->
insert(id_to_insert, j);
134 adaption_ambigs_entry->
push_back(id_to_insert);
138 step = tmp_ptr < tmp_ptr_end ? unicharset->
step(tmp_ptr) : 0;
146 if (use_ambigs_for_adaption) {
147 for (i = 0; i < ambigs_for_adaption_.
size(); ++i) {
148 adaption_ambigs_entry = ambigs_for_adaption_[i];
149 if (adaption_ambigs_entry ==
NULL)
continue;
150 for (j = 0; j < adaption_ambigs_entry->
size(); ++j) {
151 UNICHAR_ID ambig_id = (*adaption_ambigs_entry)[j];
152 if (reverse_ambigs_for_adaption_[ambig_id] ==
NULL) {
155 reverse_ambigs_for_adaption_[ambig_id]->
push_back(i);
161 if (debug_level > 1) {
162 for (
int tbl = 0; tbl < 2; ++tbl) {
164 (tbl == 0) ? replace_ambigs_ : dang_ambigs_;
165 for (i = 0; i < print_table.
size(); ++i) {
166 AmbigSpec_LIST *lst = print_table[i];
167 if (lst ==
NULL)
continue;
169 tprintf(
"%s Ambiguities for %s:\n",
170 (tbl == 0) ?
"Replaceable" :
"Dangerous",
173 AmbigSpec_IT lst_it(lst);
174 for (lst_it.mark_cycle_pt(); !lst_it.cycled_list(); lst_it.forward()) {
183 if (use_ambigs_for_adaption) {
184 for (
int vec_id = 0; vec_id < 2; ++vec_id) {
186 ambigs_for_adaption_ : reverse_ambigs_for_adaption_;
187 for (i = 0; i < vec.
size(); ++i) {
188 adaption_ambigs_entry = vec[i];
189 if (adaption_ambigs_entry !=
NULL) {
190 tprintf(
"%sAmbigs for adaption for %s:\n",
191 (vec_id == 0) ?
"" :
"Reverse ",
193 for (j = 0; j < adaption_ambigs_entry->
size(); ++j) {
195 (*adaption_ambigs_entry)[j]).
string());
205 bool UnicharAmbigs::ParseAmbiguityLine(
206 int line_num,
int version,
int debug_level,
const UNICHARSET &unicharset,
207 char *buffer,
int *TestAmbigPartSize,
UNICHAR_ID *TestUnicharIds,
208 int *ReplacementAmbigPartSize,
char *ReplacementString,
int *type) {
212 if (!(token = strtok_r(buffer, kAmbigDelimiters, &next_token)) ||
213 !sscanf(token,
"%d", TestAmbigPartSize) || TestAmbigPartSize <= 0) {
214 if (debug_level)
tprintf(kIllegalMsg, line_num);
218 tprintf(
"Too many unichars in ambiguity on line %d\n");
221 for (i = 0; i < *TestAmbigPartSize; ++i) {
222 if (!(token = strtok_r(
NULL, kAmbigDelimiters, &next_token)))
break;
224 if (debug_level)
tprintf(kIllegalUnicharMsg, token);
229 TestUnicharIds[i] = INVALID_UNICHAR_ID;
231 if (i != *TestAmbigPartSize ||
232 !(token = strtok_r(
NULL, kAmbigDelimiters, &next_token)) ||
233 !sscanf(token,
"%d", ReplacementAmbigPartSize) ||
234 *ReplacementAmbigPartSize <= 0) {
235 if (debug_level)
tprintf(kIllegalMsg, line_num);
239 tprintf(
"Too many unichars in ambiguity on line %d\n");
242 ReplacementString[0] =
'\0';
243 for (i = 0; i < *ReplacementAmbigPartSize; ++i) {
244 if (!(token = strtok_r(
NULL, kAmbigDelimiters, &next_token)))
break;
245 strcat(ReplacementString, token);
247 if (debug_level)
tprintf(kIllegalUnicharMsg, token);
251 if (i != *ReplacementAmbigPartSize) {
252 if (debug_level)
tprintf(kIllegalMsg, line_num);
265 if (!(token = strtok_r(
NULL, kAmbigDelimiters, &next_token)) ||
266 !sscanf(token,
"%d", type)) {
267 if (debug_level)
tprintf(kIllegalMsg, line_num);
274 void UnicharAmbigs::InsertIntoTable(
276 UNICHAR_ID *TestUnicharIds,
int ReplacementAmbigPartSize,
277 const char *ReplacementString,
int type,
278 AmbigSpec *ambig_spec,
UNICHARSET *unicharset) {
279 ambig_spec->type =
static_cast<AmbigType>(type);
280 if (TestAmbigPartSize == 1 && ReplacementAmbigPartSize == 1 &&
281 unicharset->
to_lower(TestUnicharIds[0]) ==
286 ambig_spec->wrong_ngram_size =
301 ambig_spec->correct_ngram_id =
303 if (ReplacementAmbigPartSize > 1) {
304 unicharset->
set_isngram(ambig_spec->correct_ngram_id,
true);
308 for (i = 0; i < TestAmbigPartSize; ++i) {
310 if (TestAmbigPartSize == 1) {
311 unichar_id = ambig_spec->correct_ngram_id;
314 ReplacementString, i, TestAmbigPartSize,
false);
318 ambig_spec->correct_fragments[i] = unichar_id;
320 ambig_spec->correct_fragments[i] = INVALID_UNICHAR_ID;
324 if (table[TestUnicharIds[0]] ==
NULL) {
325 table[TestUnicharIds[0]] =
new AmbigSpec_LIST();
327 table[TestUnicharIds[0]]->add_sorted(