Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members  

scsu.c

00001 /*
00002 *******************************************************************************
00003 *
00004 *   Copyright (C) 1999, International Business Machines
00005 *   Corporation and others.  All Rights Reserved.
00006 *
00007 *******************************************************************************
00008 *
00009 * File scsu.c
00010 *
00011 * Modification History:
00012 *
00013 *   Date        Name        Description
00014 *   05/17/99    stephen     Creation (ported from java UnicodeCompressor.java)
00015 *   09/21/99    stephen     Updated to handle data splits on decompression.
00016 *******************************************************************************
00017 */
00018 
00019 #include <limits.h>
00020 
00021 #include "unicode/scsu.h"
00022 
00023 #include "cmemory.h"
00024 
00025 /* Generic window shift */
00026 #define COMPRESSIONOFFSET 0x80
00027 
00028 /* Indicates a window index is invalid */
00029 #define INVALIDWINDOW -1
00030 
00031 /* Indicates a character doesn't exist in input */
00032 #define INVALIDCHAR -1
00033 
00034 /* Compression modes */
00035 #define SINGLEBYTEMODE 0
00036 #define UNICODEMODE 1
00037 
00038 /* Reserved index value */
00039 #define RESERVEDINDEX 0x00
00040 
00041 /* Indices for scripts which cross a half-block boundary */
00042 #define LATININDEX 0xF9
00043 #define IPAEXTENSIONINDEX 0xFA
00044 #define GREEKINDEX 0xFB
00045 #define ARMENIANINDEX 0xFC
00046 #define HIRAGANAINDEX 0xFD
00047 #define KATAKANAINDEX 0xFE
00048 #define HALFWIDTHKATAKANAINDEX 0xFF
00049 
00050 /* Single-byte mode tags */
00051 #define SDEFINEX 0x0B
00052 /* 0x0C is a reserved value*/
00053 #define SRESERVED 0x0C
00054 #define SQUOTEU 0x0E
00055 #define SCHANGEU 0x0F
00056 
00057 #define SQUOTE0 0x01
00058 #define SQUOTE1 0x02
00059 #define SQUOTE2 0x03
00060 #define SQUOTE3 0x04
00061 #define SQUOTE4 0x05
00062 #define SQUOTE5 0x06
00063 #define SQUOTE6 0x07
00064 #define SQUOTE7 0x08
00065 
00066 #define SCHANGE0 0x10
00067 #define SCHANGE1 0x11
00068 #define SCHANGE2 0x12
00069 #define SCHANGE3 0x13
00070 #define SCHANGE4 0x14
00071 #define SCHANGE5 0x15
00072 #define SCHANGE6 0x16
00073 #define SCHANGE7 0x17
00074 
00075 #define SDEFINE0 0x18
00076 #define SDEFINE1 0x19
00077 #define SDEFINE2 0x1A
00078 #define SDEFINE3 0x1B
00079 #define SDEFINE4 0x1C
00080 #define SDEFINE5 0x1D
00081 #define SDEFINE6 0x1E
00082 #define SDEFINE7 0x1F
00083 
00084 /* Unicode mode tags */
00085 #define UCHANGE0 0xE0
00086 #define UCHANGE1 0xE1
00087 #define UCHANGE2 0xE2
00088 #define UCHANGE3 0xE3
00089 #define UCHANGE4 0xE4
00090 #define UCHANGE5 0xE5
00091 #define UCHANGE6 0xE6
00092 #define UCHANGE7 0xE7
00093 
00094 #define UDEFINE0 0xE8
00095 #define UDEFINE1 0xE9
00096 #define UDEFINE2 0xEA
00097 #define UDEFINE3 0xEB
00098 #define UDEFINE4 0xEC
00099 #define UDEFINE5 0xED
00100 #define UDEFINE6 0xEE
00101 #define UDEFINE7 0xEF
00102 
00103 #define UQUOTEU 0xF0
00104 #define UDEFINEX 0xF1
00105 /* 0xF2 is a reserved value*/
00106 #define URESERVED 0xF2
00107 
00108 /* Local function prototypes */
00109 static int32_t scsu_makeIndex(int32_t c);
00110 static UBool scsu_inDynamicWindow(const UnicodeCompressor *comp,
00111                                    int32_t c, 
00112                                    int32_t whichWindow);
00113 static UBool scsu_inStaticWindow(int32_t c, 
00114                                   int32_t whichWindow);
00115 static UBool scsu_isCompressible(int32_t c);
00116 static int32_t scsu_findDynamicWindow(const UnicodeCompressor *comp,
00117                                       int32_t c);
00118 static int32_t scsu_findStaticWindow(int32_t c);
00119 static int32_t scsu_getLRDefinedWindow(const UnicodeCompressor *comp);
00120 
00121 /* Static tables generated by CompressionTableGenerator */
00122 
00124 static int32_t sOffsetTable [] = { 
00125   0x0, 0x80, 0x100, 0x180, 0x200, 0x280, 0x300, 0x380, 0x400, 0x480,
00126   0x500, 0x580, 0x600, 0x680, 0x700, 0x780, 0x800, 0x880, 0x900,
00127   0x980, 0xa00, 0xa80, 0xb00, 0xb80, 0xc00, 0xc80, 0xd00, 0xd80,
00128   0xe00, 0xe80, 0xf00, 0xf80, 0x1000, 0x1080, 0x1100, 0x1180,
00129   0x1200, 0x1280, 0x1300, 0x1380, 0x1400, 0x1480, 0x1500, 0x1580,
00130   0x1600, 0x1680, 0x1700, 0x1780, 0x1800, 0x1880, 0x1900, 0x1980,
00131   0x1a00, 0x1a80, 0x1b00, 0x1b80, 0x1c00, 0x1c80, 0x1d00, 0x1d80,
00132   0x1e00, 0x1e80, 0x1f00, 0x1f80, 0x2000, 0x2080, 0x2100, 0x2180,
00133   0x2200, 0x2280, 0x2300, 0x2380, 0x2400, 0x2480, 0x2500, 0x2580,
00134   0x2600, 0x2680, 0x2700, 0x2780, 0x2800, 0x2880, 0x2900, 0x2980,
00135   0x2a00, 0x2a80, 0x2b00, 0x2b80, 0x2c00, 0x2c80, 0x2d00, 0x2d80,
00136   0x2e00, 0x2e80, 0x2f00, 0x2f80, 0x3000, 0x3080, 0x3100, 0x3180,
00137   0x3200, 0x3280, 0x3300, 0x3380, 0xe000, 0xe080, 0xe100, 0xe180,
00138   0xe200, 0xe280, 0xe300, 0xe380, 0xe400, 0xe480, 0xe500, 0xe580,
00139   0xe600, 0xe680, 0xe700, 0xe780, 0xe800, 0xe880, 0xe900, 0xe980,
00140   0xea00, 0xea80, 0xeb00, 0xeb80, 0xec00, 0xec80, 0xed00, 0xed80,
00141   0xee00, 0xee80, 0xef00, 0xef80, 0xf000, 0xf080, 0xf100, 0xf180,
00142   0xf200, 0xf280, 0xf300, 0xf380, 0xf400, 0xf480, 0xf500, 0xf580,
00143   0xf600, 0xf680, 0xf700, 0xf780, 0xf800, 0xf880, 0xf900, 0xf980,
00144   0xfa00, 0xfa80, 0xfb00, 0xfb80, 0xfc00, 0xfc80, 0xfd00, 0xfd80,
00145   0xfe00, 0xfe80, 0xff00, 0xff80, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00146   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00147   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00148   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00149   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00150   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
00151   0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0xc0, 0x250, 0x370,
00152   0x530, 0x3040, 0x30a0, 0xff60  
00153 };
00154 
00156 static UBool sSingleTagTable [] = {
00157   FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE,
00158   FALSE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
00159   TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
00160   TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,FALSE, FALSE,
00161   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00162   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00163   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00164   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00165   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00166   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00167   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00168   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00169   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00170   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00171   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00172   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00173   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00174   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00175   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00176   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00177   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00178   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00179   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00180   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00181   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00182   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00183   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00184   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE 
00185 };
00186 
00188 static UBool sUnicodeTagTable [] = {
00189   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00190   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00191   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00192   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00193   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00194   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00195   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00196   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00197   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00198   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00199   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00200   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00201   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00202   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00203   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00204   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00205   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00206   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00207   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00208   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00209   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00210   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00211   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00212   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00213   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, TRUE,
00214   TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,
00215   TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, FALSE, FALSE,
00216   FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
00217   FALSE
00218 };
00219 
00221 static int32_t sOffsets [] = {
00222   0x0000,   /* for quoting single-byte mode tags*/
00223   0x0080,   /* Latin-1 Supplement*/
00224   0x0100,   /* Latin Extended-A*/
00225   0x0300,   /* Combining Diacritical Marks*/
00226   0x2000,   /* General Punctuation*/
00227   0x2080,   /* Curency Symbols*/
00228   0x2100,   /* Letterlike Symbols and Number Forms*/
00229   0x3000    /* CJK Symbols and Punctuation*/
00230 };
00231 
00232 
00233 void
00234 scsu_init(UnicodeCompressor *comp)
00235 {
00236   /* initialize to defaults*/
00237   scsu_reset(comp);
00238 }
00239 
00240 void
00241 scsu_compress(UnicodeCompressor *comp,
00242               uint8_t           **target,
00243               const uint8_t     *targetLimit,
00244               const UChar       **source,
00245               const UChar       *sourceLimit,
00246               UErrorCode        *status)
00247 {
00248   /* the current position in the source unichar buffer*/
00249   const UChar *unicharBuffer = *source;
00250 
00251     /* the current position in the target byte buffer*/
00252   uint8_t *byteBuffer = *target;
00253     
00254   /* the current unicode character from the source buffer*/
00255   int32_t curUC = INVALIDCHAR;
00256   
00257   /* the index for the current character*/
00258   int32_t curIndex = -1;
00259         
00260   /* look ahead*/
00261   int32_t nextUC = INVALIDCHAR;
00262   int32_t forwardUC = INVALIDCHAR;
00263 
00264     /* temporary for window searching*/
00265   int32_t whichWindow = 0;
00266       
00267   /* high and low bytes of the current unicode character*/
00268   int32_t hiByte = 0;
00269   int32_t loByte = 0;
00270 
00271 
00272     /* verify we weren't passed a failing error code */
00273   if(U_FAILURE(*status)) {
00274     return;
00275   }
00276   /* verify the target buffer can hold at least 4 bytes */
00277   else if(targetLimit - byteBuffer < 4) {
00278     *status = U_ILLEGAL_ARGUMENT_ERROR;
00279     return;
00280   }
00281 
00282  mainLoop:
00283   while( unicharBuffer < sourceLimit && byteBuffer < targetLimit) {
00284     switch( comp->fMode ) {
00285             
00286       /* main single byte mode compression loop*/
00287     case SINGLEBYTEMODE:
00288       while( unicharBuffer < sourceLimit && byteBuffer < targetLimit ) {
00289 
00290         /* get current char*/
00291         curUC = *unicharBuffer++;
00292                 
00293         /* get next char*/
00294         if( unicharBuffer < sourceLimit ) 
00295           nextUC = *unicharBuffer;
00296         else
00297           nextUC = INVALIDCHAR;
00298                 
00299         /* chars less than 0x0080 (excluding tags) go straight in
00300            stream */
00301         if( curUC < 0x0080 ) {
00302           loByte = curUC;
00303                     
00304           /* we need to check and make sure we don't
00305              accidentally write a single byte mode tag to
00306              the stream unless it's quoted */
00307           if(sSingleTagTable[loByte]) {
00308             /* make sure there is enough room to write
00309                both bytes and if not, rewind the source
00310                stream and break out*/
00311             if( (byteBuffer + 1) >= targetLimit) { 
00312               --unicharBuffer; 
00313               goto finish;
00314             }
00315                         
00316             /* since we know the byte is less than 0x80, SQUOTE0
00317                will use static window 0, or Latin-1*/
00318             *byteBuffer++ = (uint8_t) SQUOTE0;
00319           }
00320                     
00321           *byteBuffer++ = (uint8_t) loByte;
00322         }
00323                 
00324         /* if the char belongs to current window, convert it
00325            to a byte by adding the generic compression offset
00326            and subtracting the window's offset*/
00327         else if(scsu_inDynamicWindow(comp, 
00328                                      curUC, comp->fCurrentWindow) ) {
00329           *byteBuffer++ = (uint8_t) 
00330             (curUC - comp->fOffsets[ comp->fCurrentWindow ] 
00331              + COMPRESSIONOFFSET);
00332         }
00333             
00334         /* if char is not in compressible range, either switch
00335            to or quote from unicode*/
00336         else if( ! scsu_isCompressible(curUC) ) {
00337           /* only check next character if it is valid*/
00338           if(nextUC != INVALIDCHAR && scsu_isCompressible(nextUC)) {
00339             /* make sure there is enough room to write all
00340                three bytes if not, rewind the source
00341                stream and break out*/
00342             if( (byteBuffer + 2) >= targetLimit) {
00343               --unicharBuffer; 
00344               goto finish;
00345             }
00346                     
00347             *byteBuffer++ = (uint8_t) SQUOTEU;
00348             *byteBuffer++ = (uint8_t) (curUC >> 8);
00349             *byteBuffer++ = (uint8_t) curUC;
00350           }
00351           else {
00352             /* make sure there is enough room to write all
00353                four bytes and if not, rewind the source
00354                stream and break out*/
00355             if( (byteBuffer + 3) >= targetLimit) { 
00356               --unicharBuffer; 
00357               goto finish;
00358             }
00359                     
00360             *byteBuffer++  = (uint8_t) SCHANGEU;
00361                         
00362             hiByte = curUC >> 8;
00363             loByte = curUC;
00364                         
00365             /* add quote Unicode tag */
00366             if( sUnicodeTagTable[hiByte] )
00367               *byteBuffer++ = (uint8_t) UQUOTEU;  
00368                         
00369             *byteBuffer++ = (uint8_t) hiByte;
00370             *byteBuffer++ = (uint8_t) loByte;
00371                         
00372             comp->fMode = UNICODEMODE;
00373 
00374             /* use a goto here for speed, to avoid having
00375                to check fMode in the while loop at the top
00376                of the case */
00377             goto mainLoop;
00378           }
00379         }
00380             
00381         /* if the char is in a currently defined dynamic
00382            window, figure out which one, and either switch to
00383            it or quote from it*/
00384         else if( (whichWindow = scsu_findDynamicWindow(comp, curUC)) 
00385                  != INVALIDWINDOW ) {
00386           /* look ahead*/
00387           if( (unicharBuffer + 1) < sourceLimit )
00388             forwardUC = *(unicharBuffer + 1);
00389           else
00390             forwardUC = INVALIDCHAR;
00391                     
00392           /* all three chars in same window, switch to that
00393              window- inDynamicWindow will return FALSE for
00394              INVALIDCHAR*/
00395           if( scsu_inDynamicWindow(comp, nextUC, whichWindow) 
00396               && scsu_inDynamicWindow(comp, forwardUC, whichWindow)){
00397             /* make sure there is enough room to write
00398                both bytes and if not, rewind the source
00399                stream and break out*/
00400             if( (byteBuffer + 1) >= targetLimit) { 
00401               --unicharBuffer; 
00402               goto finish;
00403             }
00404                         
00405             *byteBuffer++ = (uint8_t) (SCHANGE0 + whichWindow);
00406             *byteBuffer++ = (uint8_t) 
00407               (curUC - comp->fOffsets[whichWindow] 
00408                + COMPRESSIONOFFSET);
00409             comp->fTimeStamps [ whichWindow ] = ++(comp->fTimeStamp);
00410             comp->fCurrentWindow  = whichWindow;
00411           }
00412                     
00413           /* either only next char or neither in same
00414              window, so quote*/
00415           else {
00416             /* make sure there is enough room to write
00417                both bytes and if not, rewind the source stream
00418                and break out*/
00419             if( (byteBuffer + 1) >= targetLimit) { 
00420               --unicharBuffer; 
00421               goto finish;
00422             }
00423                     
00424             *byteBuffer++ = (uint8_t) (SQUOTE0 + whichWindow);
00425             *byteBuffer++ = (uint8_t) 
00426               (curUC - comp->fOffsets[whichWindow] 
00427                + COMPRESSIONOFFSET);
00428           }
00429         }
00430                 
00431         /* if a static window is defined, and the following
00432            character is not in that static window, quote from
00433            the static window Note: to quote from a static
00434            window, don't add 0x80*/
00435         else if( (whichWindow = scsu_findStaticWindow(curUC)) 
00436                  != INVALIDWINDOW 
00437                  && ! scsu_inStaticWindow(nextUC, whichWindow) ) {
00438           /* make sure there is enough room to write both
00439              bytes if not, rewind the source stream and
00440              break out*/
00441           if( (byteBuffer + 1) >= targetLimit) { 
00442             --unicharBuffer; 
00443             goto finish;
00444           }
00445                 
00446           *byteBuffer++ = (uint8_t) (SQUOTE0 + whichWindow);
00447           *byteBuffer++ = (uint8_t) (curUC - sOffsets[whichWindow]);
00448         }
00449             
00450         /* if a window is not defined, decide if we want to
00451            define a new one or switch to unicode mode*/
00452         else {
00453           /* determine index for current char (char is
00454              compressible)*/
00455           curIndex = scsu_makeIndex(curUC);
00456           comp->fIndexCount[curIndex]++;
00457                     
00458           /* look ahead*/
00459           if( (unicharBuffer + 1) < sourceLimit )
00460             forwardUC = *(unicharBuffer + 1);
00461           else
00462             forwardUC = INVALIDCHAR;
00463                     
00464           /* if we have encountered this index at least once
00465              before, define a new window*/
00466           if( comp->fIndexCount[curIndex] > 1 ) {
00467             /* make sure there is enough room to write all
00468                three bytes and if not, rewind the source
00469                stream and break out*/
00470             if( (byteBuffer + 2) >= targetLimit) { 
00471               --unicharBuffer; 
00472               goto finish;
00473             }
00474 
00475             /* get least recently defined window*/
00476             whichWindow = scsu_getLRDefinedWindow(comp);
00477                         
00478             *byteBuffer++ = (uint8_t) (SDEFINE0 + whichWindow);
00479             *byteBuffer++ = (uint8_t) curIndex;
00480             *byteBuffer++ = (uint8_t) 
00481               (curUC - sOffsetTable[curIndex] 
00482                + COMPRESSIONOFFSET);
00483                         
00484             comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
00485             comp->fCurrentWindow = whichWindow;
00486             comp->fTimeStamps [whichWindow] = ++(comp->fTimeStamp);
00487           }
00488                 
00489           /* three chars in a row with same index, define a
00490              new window- makeIndex will return RESERVEDINDEX
00491              for INVALIDCHAR*/
00492           else if( curIndex == scsu_makeIndex(nextUC) 
00493                    && curIndex == scsu_makeIndex(forwardUC) ) {
00494             /* make sure there is enough room to write all
00495                three bytes if not, rewind the source
00496                stream and break out*/
00497             if( (byteBuffer + 2) >= targetLimit) { 
00498               --unicharBuffer; 
00499               goto finish;
00500             }
00501 
00502             whichWindow = scsu_getLRDefinedWindow(comp);
00503                     
00504             *byteBuffer++ = (uint8_t) (SDEFINE0 + whichWindow);
00505             *byteBuffer++ = (uint8_t) curIndex;
00506             *byteBuffer++ = (uint8_t) 
00507               (curUC - sOffsetTable[curIndex] 
00508                + COMPRESSIONOFFSET);
00509                         
00510             comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
00511             comp->fCurrentWindow = whichWindow;
00512             comp->fTimeStamps [whichWindow] = ++(comp->fTimeStamp);
00513           }
00514                 
00515           /* only two chars in a row with same index, so
00516              switch to unicode mode makeIndex will return
00517              RESERVEDINDEX for INVALIDCHAR*/
00518           else if( curIndex == scsu_makeIndex(nextUC) 
00519                    && curIndex != scsu_makeIndex(forwardUC) ) {
00520             /* make sure there is enough room to write all
00521                four bytes if not, rewind the source stream
00522                and break out*/
00523             if( (byteBuffer + 3) >= targetLimit) { 
00524               --unicharBuffer; 
00525               goto finish;
00526             }
00527                         
00528             *byteBuffer++ = (uint8_t) SCHANGEU;
00529                         
00530             hiByte = curUC >> 8;
00531             loByte = curUC;
00532                         
00533             /* add quote Unicode tag */
00534             if( sUnicodeTagTable[hiByte] )
00535               *byteBuffer++ = (uint8_t) UQUOTEU;
00536                         
00537             *byteBuffer++ = (uint8_t) hiByte;
00538             *byteBuffer++ = (uint8_t) loByte;
00539                         
00540             comp->fMode = UNICODEMODE;
00541 
00542             /* use a goto here for speed, to avoid having
00543                to check fMode in the while loop at the top
00544                of the case */
00545             goto mainLoop;
00546           }
00547                     
00548           /* three chars have different indices, so switch
00549              to unicode mode*/
00550           else {
00551             /* make sure there is enough room to write all
00552                four bytes and if not, rewind the source
00553                stream and break out*/
00554             if( (byteBuffer + 3) >= targetLimit) { 
00555               --unicharBuffer; 
00556               goto finish;
00557             }
00558                     
00559             *byteBuffer++ = (uint8_t) SCHANGEU;
00560                         
00561             hiByte = curUC >> 8;
00562             loByte = curUC;
00563                         
00564             /* add quote Unicode tag*/
00565             if( sUnicodeTagTable[ hiByte ] )
00566               *byteBuffer++ = (uint8_t) UQUOTEU;
00567                         
00568             *byteBuffer++ = (uint8_t) hiByte;
00569             *byteBuffer++ = (uint8_t) loByte;
00570                         
00571             comp->fMode = UNICODEMODE;
00572 
00573             /* use a goto here for speed, to avoid having
00574                to check fMode in the while loop at the top
00575                of the case */
00576             goto mainLoop;
00577           }
00578         }
00579       }
00580       break;
00581             
00582       /* main unicode mode compression loop*/
00583     case UNICODEMODE:
00584       while(unicharBuffer < sourceLimit && byteBuffer < targetLimit) {
00585 
00586         /* get current char*/
00587         curUC = *unicharBuffer++;  
00588                 
00589         /* get next char*/
00590         if( unicharBuffer < sourceLimit )
00591           nextUC = *unicharBuffer;
00592         else
00593           nextUC = INVALIDCHAR;
00594                 
00595         /* if we have two uncompressible unichars in a row,
00596            put the current char's bytes in the stream*/
00597         if( ! scsu_isCompressible(curUC) 
00598             || (nextUC != INVALIDCHAR 
00599                 && ! scsu_isCompressible(nextUC)) ) {
00600           /* make sure there is enough room to write all
00601              three bytes and if not, rewind the source
00602              stream and break out*/
00603           if( (byteBuffer + 2) >= targetLimit) { 
00604             --unicharBuffer; 
00605             goto finish;
00606           }
00607                     
00608           hiByte = curUC >> 8;
00609           loByte = curUC;
00610                     
00611           /* add quote Unicode tag*/
00612           if( sUnicodeTagTable[ hiByte ] )
00613             *byteBuffer++   = (uint8_t) UQUOTEU;  
00614                     
00615           *byteBuffer++ = (uint8_t) hiByte;
00616           *byteBuffer++ = (uint8_t) loByte;
00617         }
00618                 
00619         /* bytes less than 0x80 can go straight in the stream,
00620            but in single-byte mode*/
00621         else if( curUC < 0x0080 ) {
00622           loByte = curUC;
00623                     
00624           /* if two chars in a row below 0x80 and the
00625              current char is not a single-byte mode tag,
00626              switch to single-byte mode*/
00627           if(nextUC != INVALIDCHAR 
00628              && nextUC < 0x0080 && ! sSingleTagTable[ loByte ] ) {
00629             /* make sure there is enough room to write
00630                both bytes and if not, rewind the source stream
00631                and break out*/
00632             if( (byteBuffer + 1) >= targetLimit) { 
00633               --unicharBuffer; 
00634               goto finish;
00635             }
00636                         
00637             /* use window 0, but any would work*/
00638             *byteBuffer++ = (uint8_t) UCHANGE0;
00639             *byteBuffer++ = (uint8_t) loByte;
00640                         
00641             comp->fCurrentWindow = 0;
00642             comp->fTimeStamps [0] = ++(comp->fTimeStamp);
00643             comp->fMode = SINGLEBYTEMODE;
00644 
00645             /* use a goto here for speed, to avoid having
00646                to check fMode in the while loop at the top
00647                of the case */
00648             goto mainLoop;
00649           }
00650                     
00651           /* otherwise, just write the bytes to the stream
00652              (this will cover the case of only 1 char less
00653              than 0x80 and single-byte mode tags)*/
00654           else {
00655             /* make sure there is enough room to write
00656                both bytes and if not, rewind the source
00657                stream and break out*/
00658             if( (byteBuffer + 1) >= targetLimit) {
00659               --unicharBuffer; 
00660               goto finish;
00661             }
00662                         
00663             /* since the character is less than 0x80, the
00664                high byte is always 0x00 - no need for
00665                (curUC >> 8)*/
00666             *byteBuffer++ = (uint8_t) 0x00;
00667             *byteBuffer++ = (uint8_t) loByte;
00668           }
00669         }
00670                 
00671         /* figure out if the current unichar is in a defined
00672            window*/
00673         else if( (whichWindow = scsu_findDynamicWindow(comp, curUC)) 
00674                  != INVALIDWINDOW ) {
00675           /* if two chars in a row in the same window,
00676              switch to that window and go to single-byte
00677              mode inDynamicWindow will return FALSE for
00678              INVALIDCHAR*/
00679           if( scsu_inDynamicWindow(comp, nextUC, whichWindow) ) {
00680             /* make sure there is enough room to write
00681                both bytes if not, rewind the source stream
00682                and break out*/
00683             if( (byteBuffer + 1) >= targetLimit) { 
00684               --unicharBuffer; 
00685               goto finish;
00686             }
00687                         
00688             *byteBuffer++ = (uint8_t) (UCHANGE0 + whichWindow);
00689             *byteBuffer++ = (uint8_t) 
00690               (curUC - comp->fOffsets[whichWindow] 
00691                + COMPRESSIONOFFSET);
00692                         
00693             comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
00694             comp->fCurrentWindow = whichWindow;
00695             comp->fMode = SINGLEBYTEMODE;
00696 
00697             /* use a goto here for speed, to avoid having
00698                to check fMode in the while loop at the top
00699                of the case */
00700             goto mainLoop;
00701           }
00702 
00703           /* otherwise, just quote the unicode for the
00704              char*/
00705           else {
00706             /* make sure there is enough room to write all
00707                three bytes and if not, rewind the source
00708                stream and break out*/
00709             if( (byteBuffer + 2) >= targetLimit) { 
00710               --unicharBuffer; 
00711               goto finish;
00712             }
00713                         
00714             hiByte = curUC >> 8;
00715             loByte = curUC;
00716                         
00717             /* add quote Unicode tag*/
00718             if( sUnicodeTagTable[ hiByte ] )
00719               *byteBuffer++ = (uint8_t) UQUOTEU;
00720                         
00721             *byteBuffer++ = (uint8_t) hiByte;
00722             *byteBuffer++ = (uint8_t) loByte;
00723           }
00724         }
00725                 
00726         /* char is not in a defined window*/
00727         else {
00728           /* determine index for current char (char is
00729              compressible)*/
00730           curIndex = scsu_makeIndex(curUC);
00731           comp->fIndexCount[curIndex]++;
00732                     
00733           /* look ahead*/
00734           if( (unicharBuffer + 1) < sourceLimit )
00735             forwardUC = *unicharBuffer;
00736           else
00737             forwardUC = INVALIDCHAR;
00738                     
00739           /* if we have encountered this index at least once
00740              before, define a new window for it that hasn't
00741              previously been redefined*/
00742           if( comp->fIndexCount[curIndex] > 1 ) {
00743             /* make sure there is enough room to write all
00744                three bytes if not, rewind the source
00745                stream and break out*/
00746             if( (byteBuffer + 2) >= targetLimit) { 
00747               --unicharBuffer; 
00748               goto finish;
00749             }
00750                         
00751             /* get least recently defined window*/
00752             whichWindow = scsu_getLRDefinedWindow(comp);
00753                         
00754             *byteBuffer++ = (uint8_t) (UDEFINE0 + whichWindow);
00755             *byteBuffer++ = (uint8_t) curIndex;
00756             *byteBuffer++ = (uint8_t) 
00757               (curUC - sOffsetTable[curIndex] 
00758                + COMPRESSIONOFFSET);
00759                         
00760             comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
00761             comp->fCurrentWindow = whichWindow;
00762             comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
00763             comp->fMode = SINGLEBYTEMODE;
00764 
00765             /* use a goto here for speed, to avoid having
00766                to check fMode in the while loop at the top
00767                of the case */
00768             goto mainLoop;
00769           }
00770                 
00771           /* if three chars in a row with the same index,
00772              define a new window makeIndex will return
00773              RESERVEDINDEX for INVALIDCHAR*/
00774           else if( curIndex == scsu_makeIndex(nextUC) 
00775                    && curIndex == scsu_makeIndex(forwardUC) ) {
00776             /* make sure there is enough room to write all
00777                three bytes if not, rewind the source
00778                stream and break out*/
00779             if( (byteBuffer + 2) >= targetLimit) { 
00780               --unicharBuffer; 
00781               goto finish;
00782             }
00783                         
00784             whichWindow = scsu_getLRDefinedWindow(comp);
00785                         
00786             *byteBuffer++ = (uint8_t) (UDEFINE0 + whichWindow);
00787             *byteBuffer++ = (uint8_t) curIndex;
00788             *byteBuffer++ = (uint8_t) 
00789               (curUC - sOffsetTable[curIndex] 
00790                + COMPRESSIONOFFSET);
00791                         
00792             comp->fOffsets[whichWindow] = sOffsetTable[curIndex];
00793             comp->fCurrentWindow = whichWindow;
00794             comp->fTimeStamps[whichWindow] = ++(comp->fTimeStamp);
00795             comp->fMode = SINGLEBYTEMODE;
00796 
00797             /* use a goto here for speed, to avoid having
00798                to check fMode in the while loop at the top
00799                of the case */
00800             goto mainLoop;
00801           }
00802                     
00803           /* otherwise just quote the unicode, and save our
00804              windows for longer runs*/
00805           else {
00806             /* make sure there is enough room to write all
00807                three bytes and if not, rewind the source
00808                stream and break out*/
00809             if( (byteBuffer + 2) >= targetLimit) { 
00810               --unicharBuffer; 
00811               goto finish;
00812             }
00813                         
00814             hiByte = curUC >> 8;
00815             loByte = curUC;
00816                         
00817             /* add quote Unicode tag*/
00818             if( sUnicodeTagTable[ hiByte ] )
00819               *byteBuffer++ = (uint8_t) UQUOTEU;
00820                         
00821             *byteBuffer++ = (uint8_t) hiByte;
00822             *byteBuffer++ = (uint8_t) loByte;
00823           }
00824         }
00825       }
00826     }  /* end switch*/
00827   }
00828     
00829  finish:
00830     
00831     /* fill in output parameters*/
00832   *target = byteBuffer;
00833   *source = unicharBuffer;
00834 
00835   if(unicharBuffer < sourceLimit)
00836     *status = U_BUFFER_OVERFLOW_ERROR;
00837 }
00838 
00839 void 
00840 scsu_decompress(UnicodeCompressor *comp,
00841                 UChar             **target,
00842                 const UChar       *targetLimit,
00843                 const uint8_t     **source,
00844                 const uint8_t     *sourceLimit,
00845                 UErrorCode        *status)
00846 {
00847   /* the current position in the source byte buffer*/
00848   const uint8_t *byteBuffer = *source;
00849 
00850   /* the current position in the target unichar buffer*/
00851   UChar *unicharBuffer = *target;
00852 
00853   /* the current byte from the source buffer*/
00854   int32_t aByte  = 0x00;
00855 
00856   /* temporary for calculating surrogate pairs */
00857   int32_t normalizedBase;
00858    
00859   /* temporary used for look-ahead */
00860   int32_t dByte;
00861 
00862 
00863   /* verify we weren't passed a failing error code */
00864   if(U_FAILURE(*status)) {
00865     return; 
00866   }
00867   /* verify the target buffer can hold at least 1 UChar */
00868   else if(targetLimit - unicharBuffer < sizeof(UChar)) {
00869     *status = U_ILLEGAL_ARGUMENT_ERROR;
00870     return;
00871   }
00872 
00873   /* if our internal buffer isn't empty, flush its contents
00874        to the output buffer before doing any more decompression */
00875   if(comp->fBufferLength > 0) {
00876 
00877     int32_t newBytes = 0;
00878     const uint8_t *newSource = comp->fBuffer;
00879     const uint8_t *newSourceLimit = comp->fBuffer + USCSU_BUFSIZE;
00880 
00881     /* fill the buffer completely, to guarantee one full character */
00882     if(comp->fBufferLength != USCSU_BUFSIZE) {
00883       newBytes = USCSU_BUFSIZE - comp->fBufferLength;
00884 
00885       /* verify there are newBytes bytes in byteBuffer */
00886       if(sourceLimit - byteBuffer < newBytes)
00887         newBytes = sourceLimit - byteBuffer;
00888 
00889       uprv_memcpy(comp->fBuffer + comp->fBufferLength, byteBuffer, newBytes);
00890     }
00891 
00892     /* reset buffer length to 0 before recursive call */
00893     comp->fBufferLength = 0;
00894 
00895     /* call self recursively to decompress the buffer */
00896     scsu_decompress(comp, &unicharBuffer, targetLimit,
00897                     &newSource, newSourceLimit, status);
00898 
00899     /* update the positions into the arrays */
00900     /* unicharBuffer was updated by the call to decompress above */
00901     byteBuffer += newBytes;
00902   }
00903 
00904   /* the main decompression loop*/
00905  mainLoop:
00906   while(byteBuffer < sourceLimit && unicharBuffer < targetLimit) {
00907 
00908     switch(comp->fMode) {  
00909 
00910       /* single-byte mode decompression loop*/
00911     case SINGLEBYTEMODE:
00912       while(byteBuffer < sourceLimit && unicharBuffer < targetLimit) {
00913                 
00914         /* get the next byte */
00915         aByte = *byteBuffer++;
00916                 
00917         switch(aByte) {
00918           /* All bytes from 0x80 through 0xFF are remapped to
00919              chars or surrogate pairs according to the currently
00920              active window */
00921         case 0x80: case 0x81: case 0x82: case 0x83: case 0x84:
00922         case 0x85: case 0x86: case 0x87: case 0x88: case 0x89:
00923         case 0x8A: case 0x8B: case 0x8C: case 0x8D: case 0x8E:
00924         case 0x8F: case 0x90: case 0x91: case 0x92: case 0x93:
00925         case 0x94: case 0x95: case 0x96: case 0x97: case 0x98:
00926         case 0x99: case 0x9A: case 0x9B: case 0x9C: case 0x9D:
00927         case 0x9E: case 0x9F: case 0xA0: case 0xA1: case 0xA2:
00928         case 0xA3: case 0xA4: case 0xA5: case 0xA6: case 0xA7:
00929         case 0xA8: case 0xA9: case 0xAA: case 0xAB: case 0xAC:
00930         case 0xAD: case 0xAE: case 0xAF: case 0xB0: case 0xB1:
00931         case 0xB2: case 0xB3: case 0xB4: case 0xB5: case 0xB6:
00932         case 0xB7: case 0xB8: case 0xB9: case 0xBA: case 0xBB:
00933         case 0xBC: case 0xBD: case 0xBE: case 0xBF: case 0xC0:
00934         case 0xC1: case 0xC2: case 0xC3: case 0xC4: case 0xC5:
00935         case 0xC6: case 0xC7: case 0xC8: case 0xC9: case 0xCA:
00936         case 0xCB: case 0xCC: case 0xCD: case 0xCE: case 0xCF:
00937         case 0xD0: case 0xD1: case 0xD2: case 0xD3: case 0xD4:
00938         case 0xD5: case 0xD6: case 0xD7: case 0xD8: case 0xD9:
00939         case 0xDA: case 0xDB: case 0xDC: case 0xDD: case 0xDE:
00940         case 0xDF: case 0xE0: case 0xE1: case 0xE2: case 0xE3:
00941         case 0xE4: case 0xE5: case 0xE6: case 0xE7:  case 0xE8:
00942         case 0xE9: case 0xEA: case 0xEB: case 0xEC: case 0xED:
00943         case 0xEE: case 0xEF: case 0xF0: case 0xF1: case 0xF2:
00944         case 0xF3: case 0xF4: case 0xF5: case 0xF6: case 0xF7:
00945         case 0xF8: case 0xF9: case 0xFA: case 0xFB: case 0xFC:
00946         case 0xFD: case 0xFE: case 0xFF: 
00947                     
00948           /* For offsets <= 0xFFFF, convert to a single char by
00949              adding the window's offset and subtracting the
00950              generic compression offset*/
00951           if(comp->fOffsets[ comp->fCurrentWindow ] <= 0xFFFF) {
00952             *unicharBuffer++ = (UChar) 
00953               (aByte + comp->fOffsets[comp->fCurrentWindow] 
00954                - COMPRESSIONOFFSET);
00955           }
00956           /* For offsets > 0x10000, convert to a surrogate pair by 
00957              normBase = window's offset - 0x10000
00958              high surrogate = 0xD800 + (normBase >> 10)
00959              low  surrogate = 0xDC00 + (normBase & 0x3FF) 
00960              + (byte & 0x7F) */
00961           else {
00962             /* make sure there is enough room to write
00963                both characters 
00964                if not, save state and break out */
00965             if((unicharBuffer + 1) >= targetLimit) {
00966               --byteBuffer;
00967               uprv_memcpy(comp->fBuffer, byteBuffer, 
00968                          sourceLimit - byteBuffer);
00969               comp->fBufferLength = sourceLimit - byteBuffer;
00970               byteBuffer += comp->fBufferLength;
00971               goto finish;
00972             }
00973                     
00974             normalizedBase = comp->fOffsets[comp->fCurrentWindow] 
00975               - 0x10000;
00976             *unicharBuffer++ = 
00977               (UChar) (0xD800 + (normalizedBase >> 10));
00978             *unicharBuffer++ = (UChar) 
00979               (0xDC00 + (normalizedBase & 0x3FF) 
00980                + (aByte & 0x7F));
00981           }
00982           break;
00983                     
00984           /* bytes from 0x20 through 0x7F are treated as ASCII
00985              and are remapped to chars by padding the high byte
00986              (this is the same as quoting from static window 0)
00987              NUL (0x00), HT (0x09), CR (0x0A), LF (0x0D) are
00988              treated as ASCII as well*/
00989         case 0x00: case 0x09: case 0x0A: case 0x0D:
00990         case 0x20: case 0x21: case 0x22: case 0x23: case 0x24:
00991         case 0x25: case 0x26: case 0x27: case 0x28: case 0x29:
00992         case 0x2A: case 0x2B: case 0x2C: case 0x2D: case 0x2E:
00993         case 0x2F: case 0x30: case 0x31: case 0x32: case 0x33:
00994         case 0x34: case 0x35: case 0x36: case 0x37: case 0x38:
00995         case 0x39: case 0x3A: case 0x3B: case 0x3C: case 0x3D:
00996         case 0x3E: case 0x3F: case 0x40: case 0x41: case 0x42:
00997         case 0x43: case 0x44: case 0x45: case 0x46: case 0x47:
00998         case 0x48: case 0x49: case 0x4A: case 0x4B: case 0x4C:
00999         case 0x4D: case 0x4E: case 0x4F: case 0x50: case 0x51:
01000         case 0x52: case 0x53: case 0x54: case 0x55: case 0x56:
01001         case 0x57: case 0x58: case 0x59: case 0x5A: case 0x5B:
01002         case 0x5C: case 0x5D: case 0x5E: case 0x5F: case 0x60:
01003         case 0x61: case 0x62: case 0x63: case 0x64: case 0x65:
01004         case 0x66: case 0x67: case 0x68: case 0x69: case 0x6A:
01005         case 0x6B: case 0x6C: case 0x6D: case 0x6E: case 0x6F:
01006         case 0x70: case 0x71: case 0x72: case 0x73: case 0x74:
01007         case 0x75: case 0x76: case 0x77: case 0x78: case 0x79:
01008         case 0x7A: case 0x7B: case 0x7C: case 0x7D: case 0x7E:
01009         case 0x7F: 
01010           *unicharBuffer++ = (UChar) aByte;
01011           break;
01012                     
01013           /* quote unicode*/
01014         case SQUOTEU:
01015           /* verify we have two bytes following tag and if not,
01016              rewind the source stream and break out */
01017           if( (byteBuffer + 1) >= sourceLimit ) {
01018             --byteBuffer;
01019             uprv_memcpy(comp->fBuffer, byteBuffer, 
01020                        sourceLimit - byteBuffer);
01021             comp->fBufferLength = sourceLimit - byteBuffer;
01022             byteBuffer += comp->fBufferLength;
01023             goto finish;
01024           }
01025                     
01026           aByte = *byteBuffer++;
01027           *unicharBuffer++ = 
01028             (UChar) (aByte << 8 | *byteBuffer++);
01029           break;
01030                     
01031           /* switch to Unicode mode*/
01032         case SCHANGEU:
01033           comp->fMode = UNICODEMODE;
01034           /* use a goto here for speed, to avoid having to check
01035              fMode in the while loop at the top of the case */
01036           goto mainLoop;
01037           break;
01038                     
01039           /* handle all quote tags*/
01040         case SQUOTE0:   case SQUOTE1:   case SQUOTE2:  case SQUOTE3:
01041         case SQUOTE4:   case SQUOTE5:   case SQUOTE6:  case SQUOTE7:
01042           /* verify there is a byte following the tag and if
01043              not, rewind the source stream and break out*/
01044           if( byteBuffer >= sourceLimit ) { 
01045             --byteBuffer;
01046             uprv_memcpy(comp->fBuffer, byteBuffer, 
01047                        sourceLimit - byteBuffer);
01048             comp->fBufferLength = sourceLimit - byteBuffer;
01049             byteBuffer += comp->fBufferLength;
01050             goto finish;
01051           }
01052                     
01053           /* if the byte is in the range 0x00 - 0x7F, use static
01054              window n- otherwise, use dynamic window n */
01055           dByte = *byteBuffer++;
01056           *unicharBuffer++ = (UChar) 
01057             (dByte + (dByte >= 0x00 && dByte < 0x80 
01058                       ? sOffsets[aByte - SQUOTE0] 
01059                       : (comp->fOffsets[aByte - SQUOTE0] 
01060                          - COMPRESSIONOFFSET))); 
01061           break;
01062                     
01063           /* handle all change tags*/
01064         case SCHANGE0: case SCHANGE1: case SCHANGE2: case SCHANGE3: 
01065         case SCHANGE4: case SCHANGE5: case SCHANGE6: case SCHANGE7:
01066           comp->fCurrentWindow = (aByte - SCHANGE0);
01067           break;
01068                     
01069           /* handle all define tags*/
01070         case SDEFINE0: case SDEFINE1: case SDEFINE2: case SDEFINE3:
01071         case SDEFINE4: case SDEFINE5: case SDEFINE6: case SDEFINE7:
01072           /* verify there is a byte following the tag and if
01073              not, rewind the source stream and break out*/
01074           if( byteBuffer >= sourceLimit ) {
01075             --byteBuffer;
01076             uprv_memcpy(comp->fBuffer, byteBuffer, 
01077                        sourceLimit - byteBuffer);
01078             comp->fBufferLength = sourceLimit - byteBuffer;
01079             byteBuffer += comp->fBufferLength;
01080             goto finish;
01081           }
01082                     
01083           comp->fCurrentWindow = (aByte - SDEFINE0);
01084           comp->fOffsets[comp->fCurrentWindow] = 
01085             sOffsetTable[*byteBuffer++];
01086           break;
01087                     
01088           /* handle define extended tag*/
01089         case SDEFINEX:
01090           /* verify we have two bytes following tag and if not,
01091              rewind the source stream and break out*/
01092           if( (byteBuffer + 1) >= sourceLimit ) {
01093             --byteBuffer;
01094             uprv_memcpy(comp->fBuffer, byteBuffer, 
01095                        sourceLimit - byteBuffer);
01096             comp->fBufferLength = sourceLimit - byteBuffer;
01097             byteBuffer += comp->fBufferLength;
01098             goto finish;
01099           }
01100                     
01101           aByte = *byteBuffer++;
01102           comp->fCurrentWindow  = (aByte & 0xE0) >> 5;
01103           comp->fOffsets[comp->fCurrentWindow] = 0x10000 
01104             + (0x80 
01105                * (((aByte & 0x1F) << 8) | *byteBuffer++));
01106           break;
01107                     
01108           /* reserved, shouldn't happen*/
01109         case SRESERVED:
01110           break;
01111                     
01112         } /* end switch*/
01113       } /* end while*/
01114       break;
01115             
01116       /* unicode mode decompression loop*/
01117     case UNICODEMODE:
01118       while( byteBuffer < sourceLimit && unicharBuffer < targetLimit ) {
01119 
01120         /* get the next byte */
01121         aByte = *byteBuffer++;
01122             
01123         switch( aByte ) {
01124           /* handle all define tags*/
01125         case UDEFINE0: case UDEFINE1: case UDEFINE2: case UDEFINE3:
01126         case UDEFINE4: case UDEFINE5: case UDEFINE6: case UDEFINE7:
01127           /* verify there is a byte following tag and if not,
01128              rewind the source stream and break out*/
01129           if( byteBuffer >= sourceLimit ) { 
01130             --byteBuffer;
01131             uprv_memcpy(comp->fBuffer, byteBuffer, 
01132                        sourceLimit - byteBuffer);
01133             comp->fBufferLength = sourceLimit - byteBuffer;
01134             byteBuffer += comp->fBufferLength;
01135             goto finish;
01136           }
01137   
01138           comp->fCurrentWindow = (aByte - UDEFINE0);
01139           comp->fOffsets[comp->fCurrentWindow] = 
01140             sOffsetTable[*byteBuffer++];
01141           comp->fMode = SINGLEBYTEMODE;
01142           /* use a goto here for speed, to avoid having to check
01143              fMode in the while loop at the top of the case */
01144           goto mainLoop;
01145           break;
01146                     
01147           /* handle define extended tag*/
01148         case UDEFINEX:
01149           /* verify we have two bytes following tag if not,
01150              rewind the source stream and break out*/
01151           if( (byteBuffer + 1) >= sourceLimit ) {
01152             --byteBuffer;
01153             uprv_memcpy(comp->fBuffer, byteBuffer, 
01154                        sourceLimit - byteBuffer);
01155             comp->fBufferLength = sourceLimit - byteBuffer;
01156             byteBuffer += comp->fBufferLength;
01157             goto finish;
01158           }
01159   
01160           aByte  = *byteBuffer++;
01161           comp->fCurrentWindow = (aByte & 0xE0) >> 5;
01162           comp->fOffsets[comp->fCurrentWindow] = 0x10000 
01163             + (0x80 
01164                * (((aByte & 0x1F) << 8) | *byteBuffer++));
01165           comp->fMode = SINGLEBYTEMODE;
01166           /* use a goto here for speed, to avoid having to check
01167              fMode in the while loop at the top of the case */
01168           goto mainLoop;
01169           break;
01170                 
01171           /* handle all change tags*/
01172         case UCHANGE0: case UCHANGE1: case UCHANGE2: case UCHANGE3:
01173         case UCHANGE4: case UCHANGE5: case UCHANGE6: case UCHANGE7:
01174           comp->fCurrentWindow = (aByte - UCHANGE0);
01175           comp->fMode  = SINGLEBYTEMODE;
01176           /* use a goto here for speed, to avoid having to check
01177              fMode in the while loop at the top of the case */
01178           goto mainLoop;
01179           break;
01180                     
01181           /* quote unicode*/
01182         case UQUOTEU:
01183           /* verify we have two bytes following tag if not,
01184              rewind the source stream and break out*/
01185           if( byteBuffer >= sourceLimit  - 1) { 
01186             --byteBuffer;
01187             uprv_memcpy(comp->fBuffer, byteBuffer, 
01188                        sourceLimit - byteBuffer);
01189             comp->fBufferLength = sourceLimit - byteBuffer;
01190             byteBuffer += comp->fBufferLength;
01191             goto finish;
01192           }
01193                     
01194           aByte = *byteBuffer++;
01195           *unicharBuffer++ = (UChar) 
01196             (aByte << 8 | *byteBuffer++);
01197           break;
01198 
01199         default:
01200           /* verify there is a byte following tag if not, rewind
01201              the source stream and break out*/
01202           if( byteBuffer >= sourceLimit ) { 
01203             --byteBuffer;
01204             uprv_memcpy(comp->fBuffer, byteBuffer, 
01205                        sourceLimit - byteBuffer);
01206             comp->fBufferLength = sourceLimit - byteBuffer;
01207             byteBuffer += comp->fBufferLength;
01208             goto finish;
01209           }
01210 
01211           *unicharBuffer++ = (UChar) (aByte << 8 | *byteBuffer++);
01212           break;
01213                 
01214         } /* end switch*/
01215       } /* end while*/
01216       break;
01217 
01218     } /* end switch( comp->fMode )*/
01219   } /* end while*/
01220 
01221     
01222  finish:
01223 
01224   /* fill in return values*/
01225   *target = unicharBuffer;
01226   *source = byteBuffer;
01227 
01228   if(byteBuffer < sourceLimit)
01229     *status = U_BUFFER_OVERFLOW_ERROR;
01230 }
01231 
01233 void 
01234 scsu_reset(UnicodeCompressor *comp)
01235 {
01236   int32_t i;
01237 
01238   /* reset dynamic windows*/
01239   comp->fOffsets[0] = 0x0080;  /* Latin-1*/
01240   comp->fOffsets[1] = 0x00C0;  /* Latin-1 Supplement + Latin Extended-A*/
01241   comp->fOffsets[2] = 0x0400;  /* Cyrillic*/
01242   comp->fOffsets[3] = 0x0600;  /* Arabic*/
01243   comp->fOffsets[4] = 0x0900;  /* Devanagari*/
01244   comp->fOffsets[5] = 0x3040;  /* Hiragana*/
01245   comp->fOffsets[6] = 0x30A0;  /* Katakana*/
01246   comp->fOffsets[7] = 0xFF00;  /* Fullwidth ASCII*/
01247     
01248   /* reset time stamps*/
01249   for(i = 0; i < USCSU_NUM_WINDOWS; i++) {
01250     comp->fTimeStamps[i]          = 0;
01251   }
01252     
01253   /* reset count of seen indices*/
01254   for( i = 0; i <= USCSU_MAX_INDEX; i++ ) {
01255     comp->fIndexCount[i] = 0;
01256   }
01257     
01258   comp->fTimeStamp      = 0;              /* Reset current time stamp*/
01259   comp->fCurrentWindow  = 0;              /* Make current window Latin-1*/
01260   comp->fMode           = SINGLEBYTEMODE; /* Start in single-byte mode*/
01261   comp->fBufferLength   = 0;              /* Empty buffer */
01262 }
01263 
01271 static int32_t 
01272 scsu_makeIndex(int32_t c)
01273 {
01274   /* check the predefined indices*/
01275   if( c >= 0x00C0 && c < 0x0140)
01276     return LATININDEX;
01277   else if( c >= 0x0250 && c < 0x02D0 )
01278     return IPAEXTENSIONINDEX;
01279   else if( c >= 0x0370 && c < 0x03F0 )
01280     return GREEKINDEX;
01281   else if( c >= 0x0530 && c < 0x0590 )
01282     return ARMENIANINDEX;
01283   else if( c >= 0x3040 && c < 0x30A0 )
01284     return HIRAGANAINDEX;
01285   else if( c >= 0x30A0 && c < 0x3120)
01286     return KATAKANAINDEX;
01287   else if( c >= 0xFF60 && c < 0xFF9F )
01288     return HALFWIDTHKATAKANAINDEX;
01289     
01290     /* calculate index*/
01291   else if( c >= 0x0080 && c < 0x3400 )
01292     return (c / 0x80) & 0xFF;
01293   else if( c >= 0xE000 && c <= 0xFFFF )
01294     return ((c - 0xAC00) / 0x80) & 0xFF;
01295     
01296     /* should never happen*/
01297   else {
01298     return RESERVEDINDEX;
01299   }
01300 }
01301 
01309 static UBool 
01310 scsu_inDynamicWindow(const UnicodeCompressor *comp,
01311                      int32_t c, 
01312                      int32_t whichWindow)
01313 {
01314   return (UBool)(c >= comp->fOffsets[whichWindow] 
01315           && c < (comp->fOffsets[whichWindow] + 0x80));
01316 }
01317 
01325 static UBool 
01326 scsu_inStaticWindow(int32_t c, 
01327                     int32_t whichWindow)
01328 {
01329   return (UBool)(c >= sOffsets[whichWindow] && c < (sOffsets[whichWindow] + 0x80));
01330 }
01331 
01337 static UBool 
01338 scsu_isCompressible(int32_t c)
01339 {
01340   return (UBool)(c < 0x3400 || c >= 0xE000);
01341 }
01342 
01349 static int32_t 
01350 scsu_findDynamicWindow(const UnicodeCompressor *comp,
01351                        int32_t c)
01352 {
01353   int32_t i;
01354     
01355   for(i = 0; i < USCSU_NUM_WINDOWS; i++) {
01356     if(scsu_inDynamicWindow(comp, c, i)) {
01357       return i;
01358     }
01359   }
01360     
01361   return INVALIDWINDOW;
01362 }
01363 
01370 static int32_t 
01371 scsu_findStaticWindow(int32_t c)
01372 {
01373   int32_t i;
01374     
01375   for(i = 0; i < USCSU_NUM_STATIC_WINDOWS; i++) {
01376     if(scsu_inStaticWindow(c, i)) {
01377       return i;
01378     }
01379   }
01380     
01381   return INVALIDWINDOW;
01382 }
01383 
01385 static int32_t 
01386 scsu_getLRDefinedWindow(const UnicodeCompressor *comp)
01387 {
01388   int32_t leastRU         = INT32_MAX;
01389   int32_t whichWindow     = INVALIDWINDOW;
01390   int32_t i;
01391   
01392   /* find least recently used window*/
01393   for(i = 0; i < USCSU_NUM_WINDOWS; i++ ) {
01394     if(comp->fTimeStamps[i] < leastRU) {
01395       leastRU = comp->fTimeStamps[i];
01396       whichWindow = i;
01397     }
01398   }
01399     
01400   return whichWindow;
01401 }

Generated at Tue Dec 5 10:47:52 2000 for ICU by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000