Tesseract  3.02
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
clusttool.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: clustertool.c
3  ** Purpose: Misc. tools for use with the clustering routines
4  ** Author: Dan Johnson
5  ** History: 6/6/89, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 //--------------------------Include Files----------------------------------
20 #include "clusttool.h"
21 #include "const.h"
22 #include "danerror.h"
23 #include "emalloc.h"
24 #include "scanutils.h"
25 #include <stdio.h>
26 #include <math.h>
27 
28 //---------------Global Data Definitions and Declarations--------------------
29 #define TOKENSIZE 80 //max size of tokens read from an input file
30 #define MAXSAMPLESIZE 65535 //max num of dimensions in feature space
31 //#define MAXBLOCKSIZE 65535 //max num of samples in a character (block size)
32 
33 /*---------------------------------------------------------------------------
34  Public Code
35 -----------------------------------------------------------------------------*/
46 uinT16 ReadSampleSize(FILE *File) {
47  int SampleSize;
48 
49  if ((fscanf (File, "%d", &SampleSize) != 1) ||
50  (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
51  DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
52  return (SampleSize);
53 } // ReadSampleSize
54 
55 
68 PARAM_DESC *ReadParamDesc(FILE *File, uinT16 N) {
69  int i;
70  PARAM_DESC *ParamDesc;
71  char Token[TOKENSIZE];
72 
73  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
74  for (i = 0; i < N; i++) {
75  if (fscanf (File, "%s", Token) != 1)
77  "Illegal circular/linear specification");
78  if (Token[0] == 'c')
79  ParamDesc[i].Circular = TRUE;
80  else
81  ParamDesc[i].Circular = FALSE;
82 
83  if (fscanf (File, "%s", Token) != 1)
85  "Illegal essential/non-essential spec");
86  if (Token[0] == 'e')
87  ParamDesc[i].NonEssential = FALSE;
88  else
89  ParamDesc[i].NonEssential = TRUE;
90  if (fscanf (File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) !=
91  2)
92  DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
93  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
94  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
95  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
96  }
97  return (ParamDesc);
98 } // ReadParamDesc
99 
100 
115 PROTOTYPE *ReadPrototype(FILE *File, uinT16 N) {
116  char Token[TOKENSIZE];
117  int Status;
118  PROTOTYPE *Proto;
119  int SampleCount;
120  int i;
121 
122  if ((Status = fscanf (File, "%s", Token)) == 1) {
123  Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
124  Proto->Cluster = NULL;
125  if (Token[0] == 's')
126  Proto->Significant = TRUE;
127  else
128  Proto->Significant = FALSE;
129 
130  Proto->Style = ReadProtoStyle (File);
131 
132  if ((fscanf (File, "%d", &SampleCount) != 1) || (SampleCount < 0))
133  DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
134  Proto->NumSamples = SampleCount;
135 
136  Proto->Mean = ReadNFloats (File, N, NULL);
137  if (Proto->Mean == NULL)
138  DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
139 
140  switch (Proto->Style) {
141  case spherical:
142  if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
143  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
144  Proto->Magnitude.Spherical =
145  1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
146  Proto->TotalMagnitude =
147  pow (Proto->Magnitude.Spherical, (float) N);
148  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
149  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
150  Proto->Distrib = NULL;
151  break;
152  case elliptical:
153  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
154  if (Proto->Variance.Elliptical == NULL)
155  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
156  Proto->Magnitude.Elliptical =
157  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
158  Proto->Weight.Elliptical =
159  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
160  Proto->TotalMagnitude = 1.0;
161  for (i = 0; i < N; i++) {
162  Proto->Magnitude.Elliptical[i] =
163  1.0 /
164  sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
165  Proto->Weight.Elliptical[i] =
166  1.0 / Proto->Variance.Elliptical[i];
167  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
168  }
169  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
170  Proto->Distrib = NULL;
171  break;
172  case mixed:
173  Proto->Distrib =
174  (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
175  for (i = 0; i < N; i++) {
176  if (fscanf (File, "%s", Token) != 1)
178  "Illegal prototype distribution");
179  switch (Token[0]) {
180  case 'n':
181  Proto->Distrib[i] = normal;
182  break;
183  case 'u':
184  Proto->Distrib[i] = uniform;
185  break;
186  case 'r':
187  Proto->Distrib[i] = D_random;
188  break;
189  default:
191  "Illegal prototype distribution");
192  }
193  }
194  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
195  if (Proto->Variance.Elliptical == NULL)
196  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
197  Proto->Magnitude.Elliptical =
198  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
199  Proto->Weight.Elliptical =
200  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
201  Proto->TotalMagnitude = 1.0;
202  for (i = 0; i < N; i++) {
203  switch (Proto->Distrib[i]) {
204  case normal:
205  Proto->Magnitude.Elliptical[i] = 1.0 /
206  sqrt ((double)
207  (2.0 * PI * Proto->Variance.Elliptical[i]));
208  Proto->Weight.Elliptical[i] =
209  1.0 / Proto->Variance.Elliptical[i];
210  break;
211  case uniform:
212  case D_random:
213  Proto->Magnitude.Elliptical[i] = 1.0 /
214  (2.0 * Proto->Variance.Elliptical[i]);
215  break;
216  case DISTRIBUTION_COUNT:
217  ASSERT_HOST(!"Distribution count not allowed!");
218  }
219  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
220  }
221  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
222  break;
223  }
224  return (Proto);
225  }
226  else if (Status == EOF)
227  return (NULL);
228  else {
229  DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
230  return (NULL);
231  }
232 } // ReadPrototype
233 
234 
235 /* ReadProtoStyle *************************************************************
236 Parameters: File open text file to read prototype style from
237 Globals: None
238 Operation: This routine reads an single token from the specified
239  text file and interprets it as a prototype specification.
240 Return: Prototype style read from text file
241 Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
242 History: 6/8/89, DSJ, Created.
243 *******************************************************************************/
245  char Token[TOKENSIZE];
246  PROTOSTYLE Style;
247 
248  if (fscanf (File, "%s", Token) != 1)
249  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
250  switch (Token[0]) {
251  case 's':
252  Style = spherical;
253  break;
254  case 'e':
255  Style = elliptical;
256  break;
257  case 'm':
258  Style = mixed;
259  break;
260  case 'a':
261  Style = automatic;
262  break;
263  default:
264  Style = elliptical;
265  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
266  }
267  return (Style);
268 } // ReadProtoStyle
269 
270 
285 FLOAT32 *
286 ReadNFloats (FILE * File, uinT16 N, FLOAT32 Buffer[]) {
287  int i;
288  int NumFloatsRead;
289 
290  if (Buffer == NULL)
291  Buffer = (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
292 
293  for (i = 0; i < N; i++) {
294  NumFloatsRead = fscanf (File, "%f", &(Buffer[i]));
295  if (NumFloatsRead != 1) {
296  if ((NumFloatsRead == EOF) && (i == 0))
297  return (NULL);
298  else
299  DoError (ILLEGALFLOAT, "Illegal float specification");
300  }
301  }
302  return (Buffer);
303 } // ReadNFloats
304 
305 
317 void
318 WriteParamDesc (FILE * File, uinT16 N, PARAM_DESC ParamDesc[]) {
319  int i;
320 
321  for (i = 0; i < N; i++) {
322  if (ParamDesc[i].Circular)
323  fprintf (File, "circular ");
324  else
325  fprintf (File, "linear ");
326 
327  if (ParamDesc[i].NonEssential)
328  fprintf (File, "non-essential ");
329  else
330  fprintf (File, "essential ");
331 
332  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
333  }
334 } // WriteParamDesc
335 
336 
348 void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto) {
349  int i;
350 
351  if (Proto->Significant)
352  fprintf (File, "significant ");
353  else
354  fprintf (File, "insignificant ");
355  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
356  fprintf (File, "%6d\n\t", Proto->NumSamples);
357  WriteNFloats (File, N, Proto->Mean);
358  fprintf (File, "\t");
359 
360  switch (Proto->Style) {
361  case spherical:
362  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
363  break;
364  case elliptical:
365  WriteNFloats (File, N, Proto->Variance.Elliptical);
366  break;
367  case mixed:
368  for (i = 0; i < N; i++)
369  switch (Proto->Distrib[i]) {
370  case normal:
371  fprintf (File, " %9s", "normal");
372  break;
373  case uniform:
374  fprintf (File, " %9s", "uniform");
375  break;
376  case D_random:
377  fprintf (File, " %9s", "random");
378  break;
379  case DISTRIBUTION_COUNT:
380  ASSERT_HOST(!"Distribution count not allowed!");
381  }
382  fprintf (File, "\n\t");
383  WriteNFloats (File, N, Proto->Variance.Elliptical);
384  }
385 } // WritePrototype
386 
387 
399 void WriteNFloats(FILE * File, uinT16 N, FLOAT32 Array[]) {
400  for (int i = 0; i < N; i++)
401  fprintf(File, " %9.6f", Array[i]);
402  fprintf(File, "\n");
403 } // WriteNFloats
404 
405 
417 void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle) {
418  switch (ProtoStyle) {
419  case spherical:
420  fprintf (File, "spherical");
421  break;
422  case elliptical:
423  fprintf (File, "elliptical");
424  break;
425  case mixed:
426  fprintf (File, "mixed");
427  break;
428  case automatic:
429  fprintf (File, "automatic");
430  break;
431  }
432 } // WriteProtoStyle
433 
434 /*---------------------------------------------------------------------------*/
436  FILE *File,
437  uinT16 N,
438  PARAM_DESC ParamDesc[],
439  LIST ProtoList,
440  BOOL8 WriteSigProtos,
441  BOOL8 WriteInsigProtos)
442 
443 /*
444 ** Parameters:
445 ** File open text file to write prototypes to
446 ** N number of dimensions in feature space
447 ** ParamDesc descriptions for each dimension
448 ** ProtoList list of prototypes to be written
449 ** WriteSigProtos TRUE to write out significant prototypes
450 ** WriteInsigProtos TRUE to write out insignificants
451 ** Globals:
452 ** None
453 ** Operation:
454 ** This routine writes a textual description of each prototype
455 ** in the prototype list to the specified file. It also
456 ** writes a file header which includes the number of dimensions
457 ** in feature space and the descriptions for each dimension.
458 ** Return:
459 ** None
460 ** Exceptions:
461 ** None
462 ** History:
463 ** 6/12/89, DSJ, Created.
464 */
465 
466 {
467  PROTOTYPE *Proto;
468 
469  /* write file header */
470  fprintf(File,"%0d\n",N);
471  WriteParamDesc(File,N,ParamDesc);
472 
473  /* write prototypes */
474  iterate(ProtoList)
475  {
476  Proto = (PROTOTYPE *) first_node ( ProtoList );
477  if (( Proto->Significant && WriteSigProtos ) ||
478  ( ! Proto->Significant && WriteInsigProtos ) )
479  WritePrototype( File, N, Proto );
480  }
481 } /* WriteProtoList */
482