30 #pragma warning(disable:4244) // Conversion warnings
31 #pragma warning(disable:4800) // int/bool warnings
49 if (word.
length() == 0)
return false;
51 int end_index = word.
length() - 1;
52 for (
int i = 0; i <= end_index; i++) {
59 if (edge != NO_EDGE) {
61 if (node == 0) node = NO_EDGE;
71 bool enable_wildcard)
const {
72 if (filename ==
NULL)
return 0;
87 enable_wildcard ? wildcard : INVALID_UNICHAR_ID)) {
88 tprintf(
"Missing word: %s\n",
string);
92 tprintf(
"Failed to create a valid word from %s\n",
string);
112 for (
int i = 0; i < children.
size(); i++) {
132 if (wildcard != INVALID_UNICHAR_ID && word->
unichar_id(index) == wildcard) {
133 bool any_matched =
false;
136 for (
int i = 0; i < vec.
size(); ++i) {
144 word_end = index == word->
length() - 1;
146 if (edge != NO_EDGE) {
151 }
else if (node != 0) {
160 PermuterType perm,
int unicharset_size,
int debug_level) {
185 bool word_end)
const {
189 EDGE_REF end = num_forward_edges_in_node0 - 1;
191 while (start <= end) {
192 edge = (start + end) >> 1;
194 unichar_id, edges_[edge]);
197 }
else if (compare == 1) {
204 if (edge != NO_EDGE && edge_occupied(edge)) {
209 }
while (!last_edge(edge++));
219 if (forward_edge (edge)) {
222 }
while (!last_edge(edge++));
229 if (node == NO_EDGE)
return;
232 const char *forward_string =
"FORWARD";
233 const char *backward_string =
" ";
235 const char *last_string =
"LAST";
236 const char *not_last_string =
" ";
238 const char *eow_string =
"EOW";
239 const char *not_eow_string =
" ";
247 if (edge_occupied(edge)) {
250 forward_edge(edge) ? forward_string : backward_string;
251 is_last = last_edge(edge) ? last_string : not_last_string;
252 eow =
end_of_word(edge) ? eow_string : not_eow_string;
257 direction, is_last, eow);
259 if (edge - node > max_num_edges)
return;
260 }
while (!last_edge(edge++));
262 if (edge < num_edges_ &&
263 edge_occupied(edge) && backward_edge(edge)) {
266 forward_edge(edge) ? forward_string : backward_string;
267 is_last = last_edge(edge) ? last_string : not_last_string;
268 eow =
end_of_word(edge) ? eow_string : not_eow_string;
272 ", unichar_id = %d, %s %s %s\n",
274 direction, is_last, eow);
277 }
while (!last_edge(edge++));
286 void SquishedDawg::print_edge(
EDGE_REF edge)
const {
287 if (edge == NO_EDGE) {
291 ", unichar_id = '%d', %s %s %s\n", edge,
293 (forward_edge(edge) ?
"FORWARD" :
" "),
294 (last_edge(edge) ?
"LAST" :
" "),
299 void SquishedDawg::read_squished_dawg(FILE *file,
304 if (debug_level)
tprintf(
"Reading squished dawg\n");
309 fread(&magic,
sizeof(
inT16), 1, file);
313 fread(&unicharset_size,
sizeof(
inT32), 1, file);
314 fread(&num_edges_,
sizeof(
inT32), 1, file);
317 unicharset_size =
reverse32(unicharset_size);
321 Dawg::init(type, lang, perm, unicharset_size, debug_level);
324 fread(&edges_[0],
sizeof(
EDGE_RECORD), num_edges_, file);
327 for (edge = 0; edge < num_edges_; ++edge) {
331 if (debug_level > 2) {
332 tprintf(
"type: %d lang: %s perm: %d unicharset_size: %d num_edges: %d\n",
334 for (edge = 0; edge < num_edges_; ++edge)
339 NODE_MAP SquishedDawg::build_node_map(
inT32 *num_nodes)
const {
347 for (edge = 0; edge < num_edges_; edge++)
348 node_map [edge] = -1;
350 node_counter = num_forward_edges(0);
353 for (edge = 0; edge < num_edges_; edge++) {
355 if (forward_edge(edge)) {
357 node_map[edge] = (edge ? node_counter : 0);
358 num_edges = num_forward_edges(edge);
359 if (edge != 0) node_counter += num_edges;
361 if (edge >= num_edges_)
break;
362 if (backward_edge(edge))
while (!last_edge(edge++));
372 inT32 node_count = 0;
379 node_map = build_node_map(&node_count);
383 fwrite(&magic,
sizeof(
inT16), 1, file);
384 fwrite(&unicharset_size_,
sizeof(
inT32), 1, file);
388 for (edge=0; edge < num_edges_; edge++)
389 if (forward_edge(edge))
392 fwrite(&num_edges,
sizeof(
inT32), 1, file);
395 tprintf(
"%d nodes in DAWG\n", node_count);
396 tprintf(
"%d edges in DAWG\n", num_edges);
399 for (edge = 0; edge < num_edges_; edge++) {
400 if (forward_edge(edge)) {
403 set_next_node(edge, node_map[old_index]);
404 temp_record = edges_[edge];
405 fwrite(&(temp_record),
sizeof(
EDGE_RECORD), 1, file);
406 set_next_node(edge, old_index);
407 }
while (!last_edge(edge++));
409 if (edge >= num_edges_)
break;
410 if (backward_edge(edge))
411 while (!last_edge(edge++));