Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

ucol.h File Reference

C API: Collator. More...

#include "unicode/utypes.h"
#include "unicode/unorm.h"
#include "unicode/ucoleitr.h"

Go to the source code of this file.

Defines

#define U_COL_SAFECLONE_BUFFERSIZE   256

Typedefs

typedef struct collIterate collIterate
typedef struct incrementalContext incrementalContext
typedef struct UCollator UCollator
typedef UColAttributeValue UCollationStrength
 Possible collation strengths - all under UColAttributeValue. More...


Enumerations

enum  UCollationResult { UCOL_EQUAL = 0, UCOL_GREATER = 1, UCOL_LESS = -1 }
 Possible values for a comparison result. More...

enum  UColAttributeValue {
  UCOL_DEFAULT = -1, UCOL_PRIMARY = 0, UCOL_SECONDARY = 1, UCOL_TERTIARY = 2,
  UCOL_DEFAULT_STRENGTH = UCOL_TERTIARY, UCOL_CE_STRENGTH_LIMIT, UCOL_QUATERNARY = 3, UCOL_IDENTICAL = 15,
  UCOL_STRENGTH_LIMIT, UCOL_OFF = 16, UCOL_ON = 17, UCOL_SHIFTED = 20,
  UCOL_NON_IGNORABLE = 21, UCOL_LOWER_FIRST = 24, UCOL_UPPER_FIRST = 25, UCOL_ON_WITHOUT_HANGUL = 28,
  UCOL_ATTRIBUTE_VALUE_COUNT
}
enum  UColAttribute {
  UCOL_FRENCH_COLLATION, UCOL_ALTERNATE_HANDLING, UCOL_CASE_FIRST, UCOL_CASE_LEVEL,
  UCOL_NORMALIZATION_MODE, UCOL_STRENGTH, UCOL_ATTRIBUTE_COUNT
}
enum  UColRuleOption { UCOL_TAILORING_ONLY, UCOL_FULL_RULES }

Functions

U_CAPI UCollatorucol_open ( const char *loc, UErrorCode *status)
 Open a UCollator for comparing strings. More...

U_CAPI UCollator* U_EXPORT2 ucol_openVersion (const char *loc, UVersionInfo version, UErrorCode *status)
 Open a UCollator with a specific version. More...

U_CAPI UCollatorucol_openRules ( const UChar *rules, int32_t rulesLength, UNormalizationMode mode, UCollationStrength strength, UErrorCode *status)
 Open a UCollator for comparing strings. More...

U_CAPI void ucol_close (UCollator *coll)
 Close a UCollator. More...

U_CAPI UCollationResult ucol_strcoll ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
 Compare two strings. More...

U_CAPI UBool ucol_greater ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
 Determine if one string is greater than another. More...

U_CAPI UBool ucol_greaterOrEqual ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
 Determine if one string is greater than or equal to another. More...

U_CAPI UBool ucol_equal ( const UCollator *coll, const UChar *source, int32_t sourceLength, const UChar *target, int32_t targetLength)
 Compare two strings for equality. More...

U_CAPI UCollationStrength ucol_getStrength (const UCollator *coll)
 Get the collation strength used in a UCollator. More...

U_CAPI void ucol_setStrength ( UCollator *coll, UCollationStrength strength)
 Set the collation strength used in a UCollator. More...

U_CAPI UNormalizationMode ucol_getNormalization (const UCollator* coll)
 Get the normalization mode used in a UCollator. More...

U_CAPI void ucol_setNormalization ( UCollator *coll, UNormalizationMode mode)
 Set the normalization mode used in a UCollator. More...

U_CAPI int32_t ucol_getDisplayName ( const char *objLoc, const char *dispLoc, UChar *result, int32_t resultLength, UErrorCode *status)
 Get the display name for a UCollator. More...

U_CAPI const char* ucol_getAvailable (int32_t index)
 Get a locale for which collation rules are available. More...

U_CAPI int32_t ucol_countAvailable (void)
 Determine how many locales have collation rules available. More...

U_CAPI const UCharucol_getRules ( const UCollator *coll, int32_t *length)
 Get the collation rules from a UCollator. More...

U_CAPI int32_t ucol_getSortKey (const UCollator *coll, const UChar *source, int32_t sourceLength, uint8_t *result, int32_t resultLength)
 Get a sort key for a string from a UCollator. More...

U_CAPI void U_EXPORT2 ucol_getVersion (const UCollator* coll, UVersionInfo info)
 Gets the version information for a Collator. More...

U_CAPI void ucol_setAttribute (UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status)
 Universal attribute setter. More...

U_CAPI UColAttributeValue ucol_getAttribute (const UCollator *coll, UColAttribute attr, UErrorCode *status)
 Universal attribute getter. More...

U_CAPI UCollatorucol_safeClone ( const UCollator *coll, void *stackBuffer, int32_t *pBufferSize, UErrorCode *status)
 Thread safe cloning operation. More...

U_CDECL_BEGIN typedef UChar UCharForwardIterator (void *context)
U_CDECL_END U_CAPI UCollationResult ucol_strcollinc (const UCollator *coll, UCharForwardIterator *source, void *sourceContext, UCharForwardIterator *target, void *targetContext)
 String compare that uses user supplied character iteration. More...

U_CAPI int32_t ucol_getRulesEx (const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen)
 Returns current rules. More...


Detailed Description

C API: Collator.

Collator C API

The C API for Collator performs locale-sensitive string comparison. You use this class to build searching and sorting routines for natural language text. Important: The ICU collation implementation is being reworked. This means that collation results and especially sort keys will change from ICU 1.6 to 1.7 and again to 1.8. For details, see the collation design document.

Like other locale-sensitive classes, you can use the function ucol_open(), to obtain the appropriate pointer to UCollator object for a given locale. If you need to understand the details of a particular collation strategy or if you need to modify that strategy.

The following example shows how to compare two strings using the UCollator for the default locale.

 
 // Compare two strings in the default locale
 UErrorCode success = U_ZERO_ERROR;
 UCollator* myCollator = ucol_open(NULL, &success);
 UChar source[4], target[4];
 u_uastrcpy(source, "abc");
 u_uastrcpy(target, "ABC");
 if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_LESS) {
     printf("abc is less than ABC\n");
 }else{
     printf("abc is greater than or equal to ABC\n");
 }

You can set a Collator's strength property to determine the level of difference considered significant in comparisons. Four strengths are provided: UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, and UCOL_IDENTICAL. The exact assignment of strengths to language features is locale dependant. For example, in Czech, "e" and "f" are considered primary differences, while "e" and "\u00EA" are secondary differences, "e" and "E" are tertiary differences and "e" and "e" are identical. The following shows how both case and accents could be ignored for US English.

 
 //Get the Collator for US English and set its strength to UCOL_PRIMARY
 UErrorCode success = U_ZERO_ERROR;
 UCollator* usCollator = ucol_open("en_US", &success);
 ucol_setStrength(usCollator, UCOL_PRIMARY);
 UChar source[4], target[4];
 u_uastrcpy(source, "abc");
 u_uastrcpy(target, "ABC");
 if( u_strcoll(myCollator, source, u_strlen(source), target, u_strlen(target)) == UCOL_EQUAL) {
     printf("'abc' and 'ABC' strings are equivalent with strength UCOL_PRIMARY\n");
 }

For comparing strings exactly once, the u_strcoll method provides the best performance. When sorting a list of strings however, it is generally necessary to compare each string multiple times. In this case, sort keys provide better performance. The ucol_getSortKey method converts a string to a series of bytes that can be compared bitwise against other sort keys using strcmp(). Sort keys are written as zero-terminated byte strings. They consist of several substrings, one for each collation strength level, that are delimited by 0x01 bytes. If the string code points are appended for UCOL_IDENTICAL, then they are processed for correct code point order comparison and may contain 0x01 bytes but not zero bytes.

Note: UCollators with different Locale, Collation Strength and Decomposition Mode settings will return different sort orders for the same set of strings. Locales have specific collation rules, and the way in which secondary and tertiary differences are taken into account, for example, will result in a different sorting order for same strings.

See also:
UCollationResult , UNormalizationMode , UCollationStrength , UCollationElements

Definition in file ucol.h.


Define Documentation

#define U_COL_SAFECLONE_BUFFERSIZE   256
 

Definition at line 580 of file ucol.h.


Typedef Documentation

typedef UColAttributeValue UCollationStrength
 

Possible collation strengths - all under UColAttributeValue.

Definition at line 208 of file ucol.h.

typedef struct UCollator UCollator
 

Definition at line 120 of file ucol.h.

typedef struct collIterate collIterate
 

Definition at line 111 of file ucol.h.

typedef struct incrementalContext incrementalContext
 

Definition at line 114 of file ucol.h.


Enumeration Type Documentation

enum UColAttribute
 

Enumeration values:
UCOL_FRENCH_COLLATION  
UCOL_ALTERNATE_HANDLING  
UCOL_CASE_FIRST  
UCOL_CASE_LEVEL  
UCOL_NORMALIZATION_MODE  
UCOL_STRENGTH  
UCOL_ATTRIBUTE_COUNT  

Definition at line 210 of file ucol.h.

enum UColAttributeValue
 

Enumeration values:
UCOL_DEFAULT  
UCOL_PRIMARY   Primary collation strength.
UCOL_SECONDARY   Secondary collation strength.
UCOL_TERTIARY   Tertiary collation strength.
UCOL_DEFAULT_STRENGTH   Default collation strength.
UCOL_CE_STRENGTH_LIMIT  
UCOL_QUATERNARY   Quaternary collation strength.
UCOL_IDENTICAL   Identical collation strength.
UCOL_STRENGTH_LIMIT  
UCOL_OFF  
UCOL_ON  
UCOL_SHIFTED  
UCOL_NON_IGNORABLE  
UCOL_LOWER_FIRST  
UCOL_UPPER_FIRST  
UCOL_ON_WITHOUT_HANGUL  
UCOL_ATTRIBUTE_VALUE_COUNT   No more attribute values after this.

Definition at line 143 of file ucol.h.

enum UColRuleOption
 

Enumeration values:
UCOL_TAILORING_ONLY  
UCOL_FULL_RULES  

Definition at line 220 of file ucol.h.

enum UCollationResult
 

Possible values for a comparison result.

Enumeration values:
UCOL_EQUAL   string a == string b.
UCOL_GREATER   string a > string b.
UCOL_LESS   string a < string b.

Definition at line 133 of file ucol.h.


Function Documentation

U_CDECL_BEGIN typedef UChar UCharForwardIterator ( void * context )
 

U_CAPI void ucol_close ( UCollator * coll )
 

Close a UCollator.

Once closed, a UCollator should not be used.

Parameters:
coll   The UCollator to close.
Stable:

Referenced by RuleBasedCollator::setUCollator().

U_CAPI int32_t ucol_countAvailable ( void )
 

Determine how many locales have collation rules available.

This function is most useful as determining the loop ending condition for calls to \Ref{ucol_getAvailable}.

Returns:
The number of locales for which collation rules are available.
See also:
ucol_getAvailable
Stable:

U_CAPI UBool ucol_equal ( const UCollator * coll,
const UChar * source,
int32_t sourceLength,
const UChar * target,
int32_t targetLength )
 

Compare two strings for equality.

This function is equivalent to \Ref{ucol_strcoll} == UCOL_EQUAL

Parameters:
coll   The UCollator containing the comparison rules.
source   The source string.
sourceLength   The length of source, or -1 if null-terminated.
target   The target string.
targetLength   The length of target, or -1 if null-terminated.
Returns:
TRUE if source is equal to target, FALSE otherwise
See also:
ucol_strcoll , ucol_greater , ucol_greaterOrEqual
Stable:

U_CAPI UColAttributeValue ucol_getAttribute ( const UCollator * coll,
UColAttribute attr,
UErrorCode * status )
 

Universal attribute getter.

Parameters:
coll   collator which attributes are to be changed
attr   attribute type
Returns:
attribute value
Parameters:
status   to indicate whether the operation went on smoothly or there were errors
Draft:
API 1.7 freeze

U_CAPI const char * ucol_getAvailable ( int32_t index )
 

Get a locale for which collation rules are available.

A UCollator in a locale returned by this function will perform the correct collation for the locale.

Parameters:
index   The index of the desired locale.
Returns:
A locale for which collation rules are available, or 0 if none.
See also:
ucol_countAvailable
Stable:

U_CAPI int32_t ucol_getDisplayName ( const char * objLoc,
const char * dispLoc,
UChar * result,
int32_t resultLength,
UErrorCode * status )
 

Get the display name for a UCollator.

The display name is suitable for presentation to a user.

Parameters:
objLoc   The locale of the collator in question.
dispLoc   The locale for display.
result   A pointer to a buffer to receive the attribute.
resultLength   The maximum size of result.
status   A pointer to an UErrorCode to receive any errors
Returns:
The total buffer size needed; if greater than resultLength, the output was truncated.
Stable:

U_CAPI UNormalizationMode ucol_getNormalization ( const UCollator * coll )
 

Get the normalization mode used in a UCollator.

The normalization mode influences how strings are compared.

Parameters:
coll   The UCollator to query.
Returns:
The normalization mode; one of UCOL_NO_NORMALIZATION, UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP, UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
See also:
ucol_setNormalization
Stable:

U_CAPI const UChar * ucol_getRules ( const UCollator * coll,
int32_t * length )
 

Get the collation rules from a UCollator.

The rules will follow the rule syntax.

Parameters:
coll   The UCollator to query.
length  
Returns:
The collation rules.
Stable:

U_CAPI int32_t ucol_getRulesEx ( const UCollator * coll,
UColRuleOption delta,
UChar * buffer,
int32_t bufferLen )
 

Returns current rules.

Delta defines whether full rules are returned or just the tailoring. Returns number of UChars needed to store rules. If buffer is NULL or bufferLen is not enough to store rules, will store up to available space.

Parameters:
coll   collator to get the rules from
delta   one of UCOL_TAILORING_ONLY, UCOL_FULL_RULES.
buffer   buffer to store the result in. If NULL, you'll get no rules.
bufferLen   lenght of buffer to store rules in. If less then needed you'll get only the part that fits in.

U_CAPI int32_t ucol_getSortKey ( const UCollator * coll,
const UChar * source,
int32_t sourceLength,
uint8_t * result,
int32_t resultLength )
 

Get a sort key for a string from a UCollator.

Sort keys may be compared using strcmp.

Parameters:
coll   The UCollator containing the collation rules.
source   The string to transform.
sourecLength   The length of source, or -1 if null-terminated.
result   A pointer to a buffer to receive the attribute.
resultLength   The maximum size of result.
Returns:
The size needed to fully store the sort key..
See also:
ucol_keyHashCode
Stable:

U_CAPI UCollationStrength ucol_getStrength ( const UCollator * coll )
 

Get the collation strength used in a UCollator.

The strength influences how strings are compared.

Parameters:
coll   The UCollator to query.
Returns:
The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL, UCOL_DEFAULT_STRENGTH
See also:
ucol_setStrength
Stable:

U_CAPI void U_EXPORT2 ucol_getVersion ( const UCollator * coll,
UVersionInfo info )
 

Gets the version information for a Collator.

Parameters:
info   the version # information, the result will be filled in
Stable:

U_CAPI UBool ucol_greater ( const UCollator * coll,
const UChar * source,
int32_t sourceLength,
const UChar * target,
int32_t targetLength )
 

Determine if one string is greater than another.

This function is equivalent to \Ref{ucol_strcoll} == UCOL_GREATER

Parameters:
coll   The UCollator containing the comparison rules.
source   The source string.
sourceLength   The length of source, or -1 if null-terminated.
target   The target string.
targetLength   The length of target, or -1 if null-terminated.
Returns:
TRUE if source is greater than target, FALSE otherwise.
See also:
ucol_strcoll , ucol_greaterOrEqual , ucol_equal
Stable:

U_CAPI UBool ucol_greaterOrEqual ( const UCollator * coll,
const UChar * source,
int32_t sourceLength,
const UChar * target,
int32_t targetLength )
 

Determine if one string is greater than or equal to another.

This function is equivalent to \Ref{ucol_strcoll} != UCOL_LESS

Parameters:
coll   The UCollator containing the comparison rules.
source   The source string.
sourceLength   The length of source, or -1 if null-terminated.
target   The target string.
targetLength   The length of target, or -1 if null-terminated.
Returns:
TRUE if source is greater than or equal to target, FALSE otherwise.
See also:
ucol_strcoll , ucol_greater , ucol_equal
Stable:

U_CAPI UCollator * ucol_open ( const char * loc,
UErrorCode * status )
 

Open a UCollator for comparing strings.

The UCollator may be used in calls to \Ref{ucol_strcoll}.

Parameters:
loc   The locale containing the comparison conventions.
status   A pointer to an UErrorCode to receive any errors
Returns:
A pointer to a UCollator, or 0 if an error occurred.
See also:
ucol_openRules
Stable:

Referenced by RuleBasedCollator::setUCollator().

U_CAPI UCollator * ucol_openRules ( const UChar * rules,
int32_t rulesLength,
UNormalizationMode mode,
UCollationStrength strength,
UErrorCode * status )
 

Open a UCollator for comparing strings.

The UCollator may be used in calls to \Ref{ucol_strcoll}.

Parameters:
rules   A string describing the collation rules.
rulesLength   The length of rules, or -1 if null-terminated.
mode   The normalization mode; one of UCOL_NO_NORMALIZATION, UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP, UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
strength   The collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL,UCOL_DEFAULT_STRENGTH
status   A pointer to an UErrorCode to receive any errors
Returns:
A pointer to a UCollator, or 0 if an error occurred.
See also:
ucol_open
Stable:

U_CAPI UCollator *U_EXPORT2 ucol_openVersion ( const char * loc,
UVersionInfo version,
UErrorCode * status )
 

Open a UCollator with a specific version.

This is the same as ucol_open() except that ucol_getVersion() of the returned object is guaranteed to be the same as the version parameter. This is designed to be used to open the same collator for a given locale even when ICU is updated. The same locale and version guarantees the same sort keys and comparison results.

Parameters:
loc   The locale ID for which to open a collator.
version   The requested collator version.
status   A pointer to a UErrorCode, must not indicate a failure before calling this function.
Returns:
A pointer to a UCollator, or NULL if an error occurred or a collator with the requested version is not available.

See also:
ucol_open , ucol_getVersion
Draft:
ICU 1.8

U_CAPI UCollator * ucol_safeClone ( const UCollator * coll,
void * stackBuffer,
int32_t * pBufferSize,
UErrorCode * status )
 

Thread safe cloning operation.

Parameters:
coll   collator to be cloned
stackBuffer   user allocated space for the new clone. If NULL new memory will be allocated. If buffer is not lareg enough, new memory will be allocated. Clients can use the U_COL_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
pBufferSize   pointer to size of allocated space. If *pBufferSize == 0, a sufficient size for use in cloning will be returned ('pre-flighting') If *pBufferSize is not enough for a stack-based safe clone, new memory will be allocated.
status   to indicate whether the operation went on smoothly or there were errors An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were necessary.
Returns:
pointer to the new clone
Draft:
API 1.8 freeze

U_CAPI void ucol_setAttribute ( UCollator * coll,
UColAttribute attr,
UColAttributeValue value,
UErrorCode * status )
 

Universal attribute setter.

Parameters:
coll   collator which attributes are to be changed
attr   attribute type
value   attribute value
status   to indicate whether the operation went on smoothly or there were errors
Draft:
API 1.7 freeze

U_CAPI void ucol_setNormalization ( UCollator * coll,
UNormalizationMode mode )
 

Set the normalization mode used in a UCollator.

The normalization mode influences how strings are compared.

Parameters:
coll   The UCollator to set.
mode   The desired normalization mode; one of UCOL_NO_NORMALIZATION, UCOL_CAN_DECOMP, UCOL_COMPAT_DECOMP, UCOL_CAN_DECOMP_COMPAT_COMP, UCOL_COMPAT_DECOMP_CAN_COMP, UCOL_DEFAULT_NORMALIZATION
See also:
ucol_getNormalization
Stable:

U_CAPI void ucol_setStrength ( UCollator * coll,
UCollationStrength strength )
 

Set the collation strength used in a UCollator.

The strength influences how strings are compared.

Example of use:

 .       UCollationResult result;
 .       UChar *source, *target;
 .       UErrorCode status = U_ZERO_ERROR;
 .       UCollator *myCollation = ucol_open("en_US", status);
 .       if (U_FAILURE(&status)) return;
 .       ucol_setStrength(myCollation, UCOL_PRIMARY);
 .       u_uastrcpy(source, "abc");
 .       u_uastrcpy(target, "ABC");
 .       // result will be "abc" == "ABC"
 .       // tertiary differences will be ignored
 .       result = ucol_strcoll(myCollation, source, u_strlen(source), target, u_strlen(target));
 
Parameters:
coll   The UCollator to set.
strength   The desired collation strength; one of UCOL_PRIMARY, UCOL_SECONDARY, UCOL_TERTIARY, UCOL_IDENTICAL, UCOL_DEFAULT_STRENGTH
See also:
ucol_getStrength
Stable:

U_CAPI UCollationResult ucol_strcoll ( const UCollator * coll,
const UChar * source,
int32_t sourceLength,
const UChar * target,
int32_t targetLength )
 

Compare two strings.

The strings will be compared using the normalization mode and options specified in \Ref{ucol_open} or \Ref{ucol_openRules}

Parameters:
coll   The UCollator containing the comparison rules.
source   The source string.
sourceLength   The length of source, or -1 if null-terminated.
target   The target string.
targetLength   The length of target, or -1 if null-terminated.
Returns:
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS
See also:
ucol_greater , ucol_greaterOrEqual , ucol_equal
Stable:

U_CDECL_END U_CAPI UCollationResult ucol_strcollinc ( const UCollator * coll,
UCharForwardIterator * source,
void * sourceContext,
UCharForwardIterator * target,
void * targetContext )
 

String compare that uses user supplied character iteration.

The idea is to prevent users from having to convert the whole string into UChar's before comparing since sometimes strings differ on first couple of characters.

Parameters:
coll   collator to be used for comparing
source   pointer to function for iterating over the first string
sourceContext   data to be passed to the first iterating function.
target   pointer to function for iterating over the second string
targetContext   data to be passed to the second iterating function.
Returns:
The result of comparing the strings; one of UCOL_EQUAL, UCOL_GREATER, UCOL_LESS


Generated at Thu Mar 22 16:12:50 2001 for ICU 1.8 by doxygen1.2.3 written by Dimitri van Heesch, © 1997-2000