2 # $Id: api.txt,v 1.2 1999/11/19 15:24:29 mleisher Exp $
11 -----------------------------------------------------------------------------
13 Macros that combine to select data tables for ucdata_load(), ucdata_unload(),
16 #define UCDATA_CASE 0x01
17 #define UCDATA_CTYPE 0x02
18 #define UCDATA_DECOMP 0x04
19 #define UCDATA_CMBCL 0x08
20 #define UCDATA_NUM 0x10
21 #define UCATA_ALL (UCDATA_CASE|UCDATA_CTYPE|UCDATA_DECOMP|\
22 UCDATA_CMBCL|UCDATA_NUM)
23 -----------------------------------------------------------------------------
25 void ucdata_load(char *paths, int masks)
27 This function initializes the UCData library by locating the data files in
28 one of the colon-separated directories in the `paths' parameter. The data
29 files to be loaded are specified in the `masks' parameter as a bitwise
30 combination of the macros listed above.
32 This should be called before using any of the other functions.
34 NOTE: the ucdata_setup(char *paths) function is now a macro that expands
35 into this function at compile time.
37 -----------------------------------------------------------------------------
39 void ucdata_unload(int masks)
41 This function unloads the data tables specified in the `masks' parameter.
43 This function should be called when the application is done using the UCData
46 NOTE: the ucdata_cleanup() function is now a macro that expands into this
47 function at compile time.
49 -----------------------------------------------------------------------------
51 void ucdata_reload(char *paths, int masks)
53 This function reloads the data files from one of the colon-separated
54 directories in the `paths' parameter. The data files to be reloaded are
55 specified in the `masks' parameter as a bitwise combination of the macros
58 If the data files have already been loaded, they are unloaded before the
59 data files are loaded again.
61 -----------------------------------------------------------------------------
63 int ucdecomp(unsigned long code, unsigned long *num, unsigned long **decomp)
65 This function determines if a character has a decomposition and returns the
66 decomposition information if it exists.
68 If a zero is returned, there is no decomposition. If a non-zero is
69 returned, then the `num' and `decomp' variables are filled in with the
74 unsigned long i, num, *decomp;
76 if (ucdecomp(0x1d5, &num, &decomp) != 0) {
77 for (i = 0; i < num; i++)
78 printf("0x%08lX,", decomp[i]);
82 -----------------------------------------------------------------------------
84 int ucdecomp_hangul(unsigned long code, unsigned long *num,
85 unsigned long decomp[])
87 This function determines if a Hangul syllable has a decomposition and
88 returns the decomposition information.
90 An array of at least size 3 should be passed to the function for the
91 decomposition of the syllable.
93 If a zero is returned, the character is not a Hangul syllable. If a
94 non-zero is returned, the `num' field will be 2 or 3 and the syllable will
95 be decomposed into the `decomp' array arithmetically.
99 unsigned long i, num, decomp[3];
101 if (ucdecomp_hangul(0xb1ba, &num, &decomp) != 0) {
102 for (i = 0; i < num; i++)
103 printf("0x%08lX,", decomp[i]);
107 -----------------------------------------------------------------------------
114 int ucnumber_lookup(unsigned long code, struct ucnumber *num)
116 This function determines if the code is a number and fills in the `num'
117 field with the numerator and denominator. If the code happens to be a
118 single digit, the numerator and denominator fields will be the same.
120 If the function returns 0, the code is not a number. Any other return
121 value means the code is a number.
123 int ucdigit_lookup(unsigned long code, int *digit)
125 This function determines if the code is a digit and fills in the `digit'
126 field with the digit value.
128 If the function returns 0, the code is not a number. Any other return
129 value means the code is a number.
131 struct ucnumber ucgetnumber(unsigned long code)
133 This is a compatibility function with John Cowan's "uctype" package. It
134 uses ucnumber_lookup().
136 int ucgetdigit(unsigned long code)
138 This is a compatibility function with John Cowan's "uctype" package. It
139 uses ucdigit_lookup().
141 -----------------------------------------------------------------------------
143 unsigned long uctoupper(unsigned long code)
145 This function returns the code unchanged if it is already upper case or has
146 no upper case equivalent. Otherwise the upper case equivalent is returned.
148 -----------------------------------------------------------------------------
150 unsigned long uctolower(unsigned long code)
152 This function returns the code unchanged if it is already lower case or has
153 no lower case equivalent. Otherwise the lower case equivalent is returned.
155 -----------------------------------------------------------------------------
157 unsigned long uctotitle(unsigned long code)
159 This function returns the code unchanged if it is already title case or has
160 no title case equivalent. Otherwise the title case equivalent is returned.
162 -----------------------------------------------------------------------------
164 int ucisalpha(unsigned long code)
165 int ucisalnum(unsigned long code)
166 int ucisdigit(unsigned long code)
167 int uciscntrl(unsigned long code)
168 int ucisspace(unsigned long code)
169 int ucisblank(unsigned long code)
170 int ucispunct(unsigned long code)
171 int ucisgraph(unsigned long code)
172 int ucisprint(unsigned long code)
173 int ucisxdigit(unsigned long code)
175 int ucisupper(unsigned long code)
176 int ucislower(unsigned long code)
177 int ucistitle(unsigned long code)
179 These functions (actually macros) determine if a character has these
180 properties. These behave in a fashion very similar to the venerable ctype
183 -----------------------------------------------------------------------------
185 int ucisisocntrl(unsigned long code)
187 Is the character a C0 control character (< 32) ?
189 int ucisfmtcntrl(unsigned long code)
191 Is the character a format control character?
193 int ucissymbol(unsigned long code)
195 Is the character a symbol?
197 int ucisnumber(unsigned long code)
199 Is the character a number or digit?
201 int ucisnonspacing(unsigned long code)
203 Is the character non-spacing?
205 int ucisopenpunct(unsigned long code)
207 Is the character an open/left punctuation (i.e. '[')
209 int ucisclosepunct(unsigned long code)
211 Is the character an close/right punctuation (i.e. ']')
213 int ucisinitialpunct(unsigned long code)
215 Is the character an initial punctuation (i.e. U+2018 LEFT SINGLE QUOTATION
218 int ucisfinalpunct(unsigned long code)
220 Is the character a final punctuation (i.e. U+2019 RIGHT SINGLE QUOTATION
223 int uciscomposite(unsigned long code)
225 Can the character be decomposed into a set of other characters?
227 int ucisquote(unsigned long code)
229 Is the character one of the many quotation marks?
231 int ucissymmetric(unsigned long code)
233 Is the character one that has an opposite form (i.e. <>)
235 int ucismirroring(unsigned long code)
237 Is the character mirroring (superset of symmetric)?
239 int ucisnonbreaking(unsigned long code)
241 Is the character non-breaking (i.e. non-breaking space)?
243 int ucisrtl(unsigned long code)
245 Does the character have strong right-to-left directionality (i.e. Arabic
248 int ucisltr(unsigned long code)
250 Does the character have strong left-to-right directionality (i.e. Latin
253 int ucisstrong(unsigned long code)
255 Does the character have strong directionality?
257 int ucisweak(unsigned long code)
259 Does the character have weak directionality (i.e. numbers)?
261 int ucisneutral(unsigned long code)
263 Does the character have neutral directionality (i.e. whitespace)?
265 int ucisseparator(unsigned long code)
267 Is the character a block or segment separator?
269 int ucislsep(unsigned long code)
271 Is the character a line separator?
273 int ucispsep(unsigned long code)
275 Is the character a paragraph separator?
277 int ucismark(unsigned long code)
279 Is the character a mark of some kind?
281 int ucisnsmark(unsigned long code)
283 Is the character a non-spacing mark?
285 int ucisspmark(unsigned long code)
287 Is the character a spacing mark?
289 int ucismodif(unsigned long code)
291 Is the character a modifier letter?
293 int ucismodifsymbol(unsigned long code)
295 Is the character a modifier symbol?
297 int ucisletnum(unsigned long code)
299 Is the character a number represented by a letter?
301 int ucisconnect(unsigned long code)
303 Is the character connecting punctuation?
305 int ucisdash(unsigned long code)
307 Is the character dash punctuation?
309 int ucismath(unsigned long code)
311 Is the character a math character?
313 int uciscurrency(unsigned long code)
315 Is the character a currency character?
317 int ucisenclosing(unsigned long code)
319 Is the character enclosing (i.e. enclosing box)?
321 int ucisprivate(unsigned long code)
323 Is the character from the Private Use Area?
325 int ucissurrogate(unsigned long code)
327 Is the character one of the surrogate codes?
329 int ucisdefined(unsigned long code)
331 Is the character defined (appeared in one of the data files)?
333 int ucisundefined(unsigned long code)
335 Is the character not defined (non-Unicode)?
337 int ucishan(unsigned long code)
339 Is the character a Han ideograph?
341 int ucishangul(unsigned long code)
343 Is the character a pre-composed Hangul syllable?