2 # $Id: api.txt,v 1.3 2001/01/02 18:46:20 mleisher Exp $
11 -----------------------------------------------------------------------------
13 Macros that combine to select data tables for ucdata_load(), ucdata_unload(),
16 #define UCDATA_CASE 0x01
17 #define UCDATA_CTYPE 0x02
18 #define UCDATA_DECOMP 0x04
19 #define UCDATA_CMBCL 0x08
20 #define UCDATA_NUM 0x10
21 #define UCDATA_COMP 0x20
22 #define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
23 UCDATA_CMBCL|UCDATA_NUM|UCDATA_COMP)
24 -----------------------------------------------------------------------------
26 void ucdata_load(char *paths, int masks)
28 This function initializes the UCData library by locating the data files in
29 one of the colon-separated directories in the `paths' parameter. The data
30 files to be loaded are specified in the `masks' parameter as a bitwise
31 combination of the macros listed above.
33 This should be called before using any of the other functions.
35 NOTE: the ucdata_setup(char *paths) function is now a macro that expands
36 into this function at compile time.
38 -----------------------------------------------------------------------------
40 void ucdata_unload(int masks)
42 This function unloads the data tables specified in the `masks' parameter.
44 This function should be called when the application is done using the UCData
47 NOTE: the ucdata_cleanup() function is now a macro that expands into this
48 function at compile time.
50 -----------------------------------------------------------------------------
52 void ucdata_reload(char *paths, int masks)
54 This function reloads the data files from one of the colon-separated
55 directories in the `paths' parameter. The data files to be reloaded are
56 specified in the `masks' parameter as a bitwise combination of the macros
59 If the data files have already been loaded, they are unloaded before the
60 data files are loaded again.
62 -----------------------------------------------------------------------------
64 int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
66 This function determines if a character has a decomposition and returns the
67 decomposition information if it exists.
69 If a zero is returned, there is no decomposition. If a non-zero is
70 returned, then the `num' and `decomp' variables are filled in with the
75 unsigned long i, num, *decomp;
77 if (ucdecomp(0x1d5, &num, &decomp) != 0) {
78 for (i = 0; i < num; i++)
79 printf("0x%08lX,", decomp[i]);
83 int uccanondecomp(const unsigned long *in, int inlen, unsigned long **out,
86 This function decomposes an input string and does canonical reordering of
87 the characters at the same time.
89 If a -1 is returned, memory allocation was not successful. If a zero is
90 returned, no decomposition occured. Any other value means the output string
91 contains the fully decomposed string in canonical order.
93 If the "outlen" parameter comes back with a value > 0, then the string
94 returned in the "out" parameter needs to be deallocated by the caller.
96 -----------------------------------------------------------------------------
98 int ucdecomp_hangul(unsigned long code, unsigned long *num,
99 unsigned long decomp[])
101 This function determines if a Hangul syllable has a decomposition and
102 returns the decomposition information.
104 An array of at least size 3 should be passed to the function for the
105 decomposition of the syllable.
107 If a zero is returned, the character is not a Hangul syllable. If a
108 non-zero is returned, the `num' field will be 2 or 3 and the syllable will
109 be decomposed into the `decomp' array arithmetically.
113 unsigned long i, num, decomp[3];
115 if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
116 for (i = 0; i < num; i++)
117 printf("0x%08lX,", decomp[i]);
121 -----------------------------------------------------------------------------
123 int uccomp(unsigned long ch1, unsigned long ch2, unsigned long *comp)
125 This function takes a pair of characters and determines if they combine to
126 form another character.
128 If a zero is returned, no composition is formed by the character pair. Any
129 other value indicates the "comp" parameter has a value.
131 int uccomp_hangul(unsigned long *str, int len)
133 This function composes the Hangul Jamo in the string. The composition is
136 The return value provides the new length of the string. This will be
137 smaller than "len" if compositions occured.
139 int uccanoncomp(unsigned long *str, int len)
141 This function does a canonical composition of characters in the string.
143 The return value is the new length of the string.
145 -----------------------------------------------------------------------------
152 int ucnumber_lookup(unsigned long code, struct ucnumber *num)
154 This function determines if the code is a number and fills in the `num'
155 field with the numerator and denominator. If the code happens to be a
156 single digit, the numerator and denominator fields will be the same.
158 If the function returns 0, the code is not a number. Any other return
159 value means the code is a number.
161 int ucdigit_lookup(unsigned long code, int *digit)
163 This function determines if the code is a digit and fills in the `digit'
164 field with the digit value.
166 If the function returns 0, the code is not a number. Any other return
167 value means the code is a number.
169 struct ucnumber ucgetnumber(unsigned long code)
171 This is a compatibility function with John Cowan's "uctype" package. It
172 uses ucnumber_lookup().
174 int ucgetdigit(unsigned long code)
176 This is a compatibility function with John Cowan's "uctype" package. It
177 uses ucdigit_lookup().
179 -----------------------------------------------------------------------------
181 unsigned long uctoupper(unsigned long code)
183 This function returns the code unchanged if it is already upper case or has
184 no upper case equivalent. Otherwise the upper case equivalent is returned.
186 -----------------------------------------------------------------------------
188 unsigned long uctolower(unsigned long code)
190 This function returns the code unchanged if it is already lower case or has
191 no lower case equivalent. Otherwise the lower case equivalent is returned.
193 -----------------------------------------------------------------------------
195 unsigned long uctotitle(unsigned long code)
197 This function returns the code unchanged if it is already title case or has
198 no title case equivalent. Otherwise the title case equivalent is returned.
200 -----------------------------------------------------------------------------
202 int ucisalpha(unsigned long code)
203 int ucisalnum(unsigned long code)
204 int ucisdigit(unsigned long code)
205 int uciscntrl(unsigned long code)
206 int ucisspace(unsigned long code)
207 int ucisblank(unsigned long code)
208 int ucispunct(unsigned long code)
209 int ucisgraph(unsigned long code)
210 int ucisprint(unsigned long code)
211 int ucisxdigit(unsigned long code)
213 int ucisupper(unsigned long code)
214 int ucislower(unsigned long code)
215 int ucistitle(unsigned long code)
217 These functions (actually macros) determine if a character has these
218 properties. These behave in a fashion very similar to the venerable ctype
221 -----------------------------------------------------------------------------
223 int ucisisocntrl(unsigned long code)
225 Is the character a C0 control character (< 32) ?
227 int ucisfmtcntrl(unsigned long code)
229 Is the character a format control character?
231 int ucissymbol(unsigned long code)
233 Is the character a symbol?
235 int ucisnumber(unsigned long code)
237 Is the character a number or digit?
239 int ucisnonspacing(unsigned long code)
241 Is the character non-spacing?
243 int ucisopenpunct(unsigned long code)
245 Is the character an open/left punctuation (i.e. '[')
247 int ucisclosepunct(unsigned long code)
249 Is the character an close/right punctuation (i.e. ']')
251 int ucisinitialpunct(unsigned long code)
253 Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION
256 int ucisfinalpunct(unsigned long code)
258 Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION
261 int uciscomposite(unsigned long code)
263 Can the character be decomposed into a set of other characters?
265 int ucisquote(unsigned long code)
267 Is the character one of the many quotation marks?
269 int ucissymmetric(unsigned long code)
271 Is the character one that has an opposite form (i.e. <>)
273 int ucismirroring(unsigned long code)
275 Is the character mirroring (superset of symmetric)?
277 int ucisnonbreaking(unsigned long code)
279 Is the character non-breaking (i.e. non-breaking space)?
281 int ucisrtl(unsigned long code)
283 Does the character have strong right-to-left directionality (i.e. Arabic
286 int ucisltr(unsigned long code)
288 Does the character have strong left-to-right directionality (i.e. Latin
291 int ucisstrong(unsigned long code)
293 Does the character have strong directionality?
295 int ucisweak(unsigned long code)
297 Does the character have weak directionality (i.e. numbers)?
299 int ucisneutral(unsigned long code)
301 Does the character have neutral directionality (i.e. whitespace)?
303 int ucisseparator(unsigned long code)
305 Is the character a block or segment separator?
307 int ucislsep(unsigned long code)
309 Is the character a line separator?
311 int ucispsep(unsigned long code)
313 Is the character a paragraph separator?
315 int ucismark(unsigned long code)
317 Is the character a mark of some kind?
319 int ucisnsmark(unsigned long code)
321 Is the character a non-spacing mark?
323 int ucisspmark(unsigned long code)
325 Is the character a spacing mark?
327 int ucismodif(unsigned long code)
329 Is the character a modifier letter?
331 int ucismodifsymbol(unsigned long code)
333 Is the character a modifier symbol?
335 int ucisletnum(unsigned long code)
337 Is the character a number represented by a letter?
339 int ucisconnect(unsigned long code)
341 Is the character connecting punctuation?
343 int ucisdash(unsigned long code)
345 Is the character dash punctuation?
347 int ucismath(unsigned long code)
349 Is the character a math character?
351 int uciscurrency(unsigned long code)
353 Is the character a currency character?
355 int ucisenclosing(unsigned long code)
357 Is the character enclosing (i.e. enclosing box)?
359 int ucisprivate(unsigned long code)
361 Is the character from the Private Use Area?
363 int ucissurrogate(unsigned long code)
365 Is the character one of the surrogate codes?
367 int ucisdefined(unsigned long code)
369 Is the character defined (appeared in one of the data files)?
371 int ucisundefined(unsigned long code)
373 Is the character not defined (non-Unicode)?
375 int ucishan(unsigned long code)
377 Is the character a Han ideograph?
379 int ucishangul(unsigned long code)
381 Is the character a pre-composed Hangul syllable?