2 * Copyright 2001 Computing Research Labs, New Mexico State University
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COMPUTING RESEARCH LAB OR NEW MEXICO STATE UNIVERSITY BE LIABLE FOR ANY
18 * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT
19 * OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
20 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
24 static char rcsid[] __attribute__ ((unused)) = "$Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $";
26 static char rcsid[] = "$Id: ucpgba.c,v 1.5 2001/01/02 18:46:20 mleisher Exp $";
39 * These macros are used while reordering of RTL runs of text for the
40 * special case of non-spacing characters being in runs of weakly
41 * directional text. They check for weak and non-spacing, and digits and
44 #define ISWEAKSPECIAL(cc) ucisprop(cc, UC_EN|UC_ES|UC_MN, UC_ET|UC_AN|UC_CS)
45 #define ISDIGITSPECIAL(cc) ucisprop(cc, UC_ND|UC_MN, 0)
48 * These macros are used while breaking a string into runs of text in
49 * different directions. Descriptions:
51 * ISLTR_LTR - Test for members of an LTR run in an LTR context. This looks
52 * for characters with ltr, non-spacing, weak, and neutral
55 * ISRTL_RTL - Test for members of an RTL run in an RTL context. This looks
56 * for characters with rtl, non-spacing, weak, and neutral
59 * ISRTL_NEUTRAL - Test for RTL or neutral characters.
61 * ISWEAK_NEUTRAL - Test for weak or neutral characters.
63 #define ISLTR_LTR(cc) ucisprop(cc, UC_L|UC_MN|UC_EN|UC_ES,\
64 UC_ET|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
66 #define ISRTL_RTL(cc) ucisprop(cc, UC_R|UC_MN|UC_EN|UC_ES,\
67 UC_ET|UC_AN|UC_CS|UC_B|UC_S|UC_WS|UC_ON)
69 #define ISRTL_NEUTRAL(cc) ucisprop(cc, UC_R, UC_B|UC_S|UC_WS|UC_ON)
70 #define ISWEAK_NEUTRAL(cc) ucisprop(cc, UC_EN|UC_ES, \
71 UC_B|UC_S|UC_WS|UC_ON|UC_ET|UC_AN|UC_CS)
74 * This table is temporarily hard-coded here until it can be constructed
75 * automatically somehow.
77 static unsigned long _symmetric_pairs[] = {
78 0x0028, 0x0029, 0x0029, 0x0028, 0x003C, 0x003E, 0x003E, 0x003C,
79 0x005B, 0x005D, 0x005D, 0x005B, 0x007B, 0x007D, 0x007D, 0x007B,
80 0x2045, 0x2046, 0x2046, 0x2045, 0x207D, 0x207E, 0x207E, 0x207D,
81 0x208D, 0x208E, 0x208E, 0x208D, 0x3008, 0x3009, 0x3009, 0x3008,
82 0x300A, 0x300B, 0x300B, 0x300A, 0x300C, 0x300D, 0x300D, 0x300C,
83 0x300E, 0x300F, 0x300F, 0x300E, 0x3010, 0x3011, 0x3011, 0x3010,
84 0x3014, 0x3015, 0x3015, 0x3014, 0x3016, 0x3017, 0x3017, 0x3016,
85 0x3018, 0x3019, 0x3019, 0x3018, 0x301A, 0x301B, 0x301B, 0x301A,
86 0xFD3E, 0xFD3F, 0xFD3F, 0xFD3E, 0xFE59, 0xFE5A, 0xFE5A, 0xFE59,
87 0xFE5B, 0xFE5C, 0xFE5C, 0xFE5B, 0xFE5D, 0xFE5E, 0xFE5E, 0xFE5D,
88 0xFF08, 0xFF09, 0xFF09, 0xFF08, 0xFF3B, 0xFF3D, 0xFF3D, 0xFF3B,
89 0xFF5B, 0xFF5D, 0xFF5D, 0xFF5B, 0xFF62, 0xFF63, 0xFF63, 0xFF62,
92 static int _symmetric_pairs_size =
93 sizeof(_symmetric_pairs)/sizeof(_symmetric_pairs[0]);
96 * This routine looks up the other form of a symmetric pair.
99 _ucsymmetric_pair(unsigned long c)
103 for (i = 0; i < _symmetric_pairs_size; i += 2) {
104 if (_symmetric_pairs[i] == c)
105 return _symmetric_pairs[i+1];
111 * This routine creates a new run, copies the text into it, links it into the
112 * logical text order chain and returns it to the caller to be linked into
113 * the visual text order chain.
116 _add_run(ucstring_t *str, unsigned long *src,
117 unsigned long start, unsigned long end, int direction)
122 run = (ucrun_t *) malloc(sizeof(ucrun_t));
123 run->visual_next = run->visual_prev = 0;
124 run->direction = direction;
128 run->chars = (unsigned long *)
129 malloc(sizeof(unsigned long) * ((end - start) << 1));
130 run->positions = run->chars + (end - start);
136 if (direction == UCPGBA_RTL) {
138 * Copy the source text into the run in reverse order and select
139 * replacements for the pairwise punctuation and the <> characters.
141 for (i = 0, t = end - 1; start < end; start++, t--, i++) {
142 run->positions[i] = t;
143 if (ucissymmetric(src[t]) || src[t] == '<' || src[t] == '>')
144 run->chars[i] = _ucsymmetric_pair(src[t]);
146 run->chars[i] = src[t];
150 * Copy the source text into the run directly.
152 for (i = start; i < end; i++) {
153 run->positions[i - start] = i;
154 run->chars[i - start] = src[i];
159 * Add the run to the logical list for cursor traversal.
161 if (str->logical_first == 0)
162 str->logical_first = str->logical_last = run;
164 run->logical_prev = str->logical_last;
165 str->logical_last->logical_next = run;
166 str->logical_last = run;
173 _ucadd_rtl_segment(ucstring_t *str, unsigned long *source, unsigned long start,
180 * This is used to splice runs into strings with overall LTR direction.
181 * The `lrun' variable will never be NULL because at least one LTR run was
182 * added before this RTL run.
184 lrun = str->visual_last;
186 for (e = s = start; s < end;) {
187 for (; e < end && ISRTL_NEUTRAL(source[e]); e++) ;
190 run = _add_run(str, source, s, e, UCPGBA_RTL);
193 * Add the run to the visual list for cursor traversal.
195 if (str->visual_first != 0) {
196 if (str->direction == UCPGBA_LTR) {
197 run->visual_prev = lrun;
198 run->visual_next = lrun->visual_next;
199 if (lrun->visual_next != 0)
200 lrun->visual_next->visual_prev = run;
201 lrun->visual_next = run;
202 if (lrun == str->visual_last)
203 str->visual_last = run;
205 run->visual_next = str->visual_first;
206 str->visual_first->visual_prev = run;
207 str->visual_first = run;
210 str->visual_first = str->visual_last = run;
214 * Handle digits in a special way. This makes sure the weakly
215 * directional characters appear on the expected sides of a number
216 * depending on whether that number is Arabic or not.
218 for (s = e; e < end && ISWEAKSPECIAL(source[e]); e++) {
219 if (!ISDIGITSPECIAL(source[e]) &&
220 (e + 1 == end || !ISDIGITSPECIAL(source[e + 1])))
225 run = _add_run(str, source, s, e, UCPGBA_LTR);
228 * Add the run to the visual list for cursor traversal.
230 if (str->visual_first != 0) {
231 if (str->direction == UCPGBA_LTR) {
232 run->visual_prev = lrun;
233 run->visual_next = lrun->visual_next;
234 if (lrun->visual_next != 0)
235 lrun->visual_next->visual_prev = run;
236 lrun->visual_next = run;
237 if (lrun == str->visual_last)
238 str->visual_last = run;
240 run->visual_next = str->visual_first;
241 str->visual_first->visual_prev = run;
242 str->visual_first = run;
245 str->visual_first = str->visual_last = run;
249 * Collect all weak non-digit sequences for an RTL segment. These
250 * will appear as part of the next RTL segment or will be added as
251 * an RTL segment by themselves.
253 for (s = e; e < end && ucisweak(source[e]) && !ucisdigit(source[e]);
258 * Capture any weak non-digit sequences that occur at the end of the RTL
262 run = _add_run(str, source, s, e, UCPGBA_RTL);
265 * Add the run to the visual list for cursor traversal.
267 if (str->visual_first != 0) {
268 if (str->direction == UCPGBA_LTR) {
269 run->visual_prev = lrun;
270 run->visual_next = lrun->visual_next;
271 if (lrun->visual_next != 0)
272 lrun->visual_next->visual_prev = run;
273 lrun->visual_next = run;
274 if (lrun == str->visual_last)
275 str->visual_last = run;
277 run->visual_next = str->visual_first;
278 str->visual_first->visual_prev = run;
279 str->visual_first = run;
282 str->visual_first = str->visual_last = run;
287 _ucadd_ltr_segment(ucstring_t *str, unsigned long *source, unsigned long start,
292 run = _add_run(str, source, start, end, UCPGBA_LTR);
295 * Add the run to the visual list for cursor traversal.
297 if (str->visual_first != 0) {
298 if (str->direction == UCPGBA_LTR) {
299 run->visual_prev = str->visual_last;
300 str->visual_last->visual_next = run;
301 str->visual_last = run;
303 run->visual_next = str->visual_first;
304 str->visual_first->visual_prev = run;
305 str->visual_first = run;
308 str->visual_first = str->visual_last = run;
312 ucstring_create(unsigned long *source, unsigned long start, unsigned long end,
313 int default_direction, int cursor_motion)
316 unsigned long s, e, ld;
319 str = (ucstring_t *) malloc(sizeof(ucstring_t));
322 * Set the initial values.
324 str->cursor_motion = cursor_motion;
325 str->logical_first = str->logical_last = 0;
326 str->visual_first = str->visual_last = str->cursor = 0;
327 str->source = source;
332 * If the length of the string is 0, then just return it at this point.
338 * This flag indicates whether the collection loop for RTL is called
339 * before the LTR loop the first time.
344 * Look for the first character in the string that has strong
347 for (s = start; s < end && !ucisstrong(source[s]); s++) ;
351 * If the string contains no characters with strong directionality, use
352 * the default direction.
354 str->direction = default_direction;
356 str->direction = ucisrtl(source[s]) ? UCPGBA_RTL : UCPGBA_LTR;
358 if (str->direction == UCPGBA_RTL)
360 * Set the flag that causes the RTL collection loop to run first.
365 * This loop now separates the string into runs based on directionality.
367 for (s = e = 0; s < end; s = e) {
370 * Determine the next run of LTR text.
374 while (e < end && ISLTR_LTR(source[e])) {
375 if (ucisdigit(source[e]) &&
376 !(0x660 <= source[e] && source[e] <= 0x669))
380 if (str->direction != UCPGBA_LTR) {
381 while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
386 * Add the LTR segment to the string.
389 _ucadd_ltr_segment(str, source, s, e);
393 * Determine the next run of RTL text.
396 while (e < end && ISRTL_RTL(source[e])) {
397 if (ucisdigit(source[e]) &&
398 !(0x660 <= source[e] && source[e] <= 0x669))
402 if (str->direction != UCPGBA_RTL) {
403 while (e > ld && ISWEAK_NEUTRAL(source[e - 1]))
408 * Add the RTL segment to the string.
411 _ucadd_rtl_segment(str, source, s, e);
414 * Clear the flag that allowed the RTL collection loop to run first
415 * for strings with overall RTL directionality.
421 * Set up the initial cursor run.
423 str->cursor = str->logical_first;
425 str->cursor->cursor = (str->cursor->direction == UCPGBA_RTL) ?
426 str->cursor->end - str->cursor->start : 0;
432 ucstring_free(ucstring_t *s)
439 for (l = 0, r = s->visual_first; r != 0; r = r->visual_next) {
440 if (r->end > r->start)
441 free((char *) r->chars);
453 ucstring_set_cursor_motion(ucstring_t *str, int cursor_motion)
460 n = str->cursor_motion;
461 str->cursor_motion = cursor_motion;
466 _ucstring_visual_cursor_right(ucstring_t *str, int count)
475 cursor = str->cursor;
477 size = cursor->end - cursor->start;
478 if ((cursor->direction == UCPGBA_RTL && cursor->cursor + 1 == size) ||
479 cursor->cursor + 1 > size) {
481 * If the next run is NULL, then the cursor is already on the
482 * far right end already.
484 if (cursor->visual_next == 0)
486 * If movement occured, then report it.
488 return (cnt != count);
491 * Move to the next run.
493 str->cursor = cursor = cursor->visual_next;
494 cursor->cursor = (cursor->direction == UCPGBA_RTL) ? -1 : 0;
495 size = cursor->end - cursor->start;
504 _ucstring_logical_cursor_right(ucstring_t *str, int count)
513 cursor = str->cursor;
515 size = cursor->end - cursor->start;
516 if (str->direction == UCPGBA_RTL) {
517 if (cursor->direction == UCPGBA_RTL) {
518 if (cursor->cursor + 1 == size) {
519 if (cursor == str->logical_first)
521 * Already at the beginning of the string.
523 return (cnt != count);
525 str->cursor = cursor = cursor->logical_prev;
526 size = cursor->end - cursor->start;
527 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
532 if (cursor->cursor == 0) {
533 if (cursor == str->logical_first)
535 * At the beginning of the string already.
537 return (cnt != count);
539 str->cursor = cursor = cursor->logical_prev;
540 size = cursor->end - cursor->start;
541 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
547 if (cursor->direction == UCPGBA_RTL) {
548 if (cursor->cursor == 0) {
549 if (cursor == str->logical_last)
551 * Already at the end of the string.
553 return (cnt != count);
555 str->cursor = cursor = cursor->logical_next;
556 size = cursor->end - cursor->start;
557 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
562 if (cursor->cursor + 1 > size) {
563 if (cursor == str->logical_last)
565 * Already at the end of the string.
567 return (cnt != count);
569 str->cursor = cursor = cursor->logical_next;
570 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
582 ucstring_cursor_right(ucstring_t *str, int count)
586 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
587 _ucstring_visual_cursor_right(str, count) :
588 _ucstring_logical_cursor_right(str, count);
592 _ucstring_visual_cursor_left(ucstring_t *str, int count)
601 cursor = str->cursor;
603 size = cursor->end - cursor->start;
604 if ((cursor->direction == UCPGBA_LTR && cursor->cursor == 0) ||
605 cursor->cursor - 1 < -1) {
607 * If the preceding run is NULL, then the cursor is already on the
608 * far left end already.
610 if (cursor->visual_prev == 0)
612 * If movement occured, then report it.
614 return (cnt != count);
617 * Move to the previous run.
619 str->cursor = cursor = cursor->visual_prev;
620 size = cursor->end - cursor->start;
621 cursor->cursor = (cursor->direction == UCPGBA_RTL) ?
631 _ucstring_logical_cursor_left(ucstring_t *str, int count)
640 cursor = str->cursor;
642 size = cursor->end - cursor->start;
643 if (str->direction == UCPGBA_RTL) {
644 if (cursor->direction == UCPGBA_RTL) {
645 if (cursor->cursor == -1) {
646 if (cursor == str->logical_last)
648 * Already at the end of the string.
650 return (cnt != count);
652 str->cursor = cursor = cursor->logical_next;
653 size = cursor->end - cursor->start;
654 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
659 if (cursor->cursor + 1 > size) {
660 if (cursor == str->logical_last)
662 * At the end of the string already.
664 return (cnt != count);
666 str->cursor = cursor = cursor->logical_next;
667 size = cursor->end - cursor->start;
668 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
674 if (cursor->direction == UCPGBA_RTL) {
675 if (cursor->cursor + 1 == size) {
676 if (cursor == str->logical_first)
678 * Already at the beginning of the string.
680 return (cnt != count);
682 str->cursor = cursor = cursor->logical_prev;
683 size = cursor->end - cursor->start;
684 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
689 if (cursor->cursor == 0) {
690 if (cursor == str->logical_first)
692 * Already at the beginning of the string.
694 return (cnt != count);
696 str->cursor = cursor = cursor->logical_prev;
697 cursor->cursor = (cursor->direction == UCPGBA_LTR) ?
709 ucstring_cursor_left(ucstring_t *str, int count)
713 return (str->cursor_motion == UCPGBA_CURSOR_VISUAL) ?
714 _ucstring_visual_cursor_left(str, count) :
715 _ucstring_logical_cursor_left(str, count);
719 ucstring_cursor_info(ucstring_t *str, int *direction, unsigned long *position)
725 if (str == 0 || direction == 0 || position == 0)
728 cursor = str->cursor;
730 *direction = cursor->direction;
733 size = cursor->end - cursor->start;
736 *position = (cursor->direction == UCPGBA_RTL) ?
737 cursor->start : cursor->positions[c - 1];
739 *position = (cursor->direction == UCPGBA_RTL) ?
740 cursor->end : cursor->start;
742 *position = cursor->positions[c];