2 # Generate keysym2ucs.c file
4 # $XFree86: xc/programs/xterm/unicode/convmap.pl,v 1.5 2000/01/24 22:22:05 dawes Exp $
10 return sprintf("%c", $c);
11 } elsif ($c < 0x800) {
12 return sprintf("%c%c", 0xc0 | ($c >> 6), 0x80 | ($c & 0x3f));
13 } elsif ($c < 0x10000) {
14 return sprintf("%c%c%c",
16 0x80 | (($c >> 6) & 0x3f),
18 } elsif ($c < 0x200000) {
19 return sprintf("%c%c%c%c",
21 0x80 | (($c >> 12) & 0x3f),
22 0x80 | (($c >> 6) & 0x3f),
24 } elsif ($c < 0x4000000) {
25 return sprintf("%c%c%c%c%c",
27 0x80 | (($c >> 18) & 0x3f),
28 0x80 | (($c >> 12) & 0x3f),
29 0x80 | (($c >> 6) & 0x3f),
32 } elsif ($c < 0x80000000) {
33 return sprintf("%c%c%c%c%c%c",
35 0x80 | (($c >> 24) & 0x3f),
36 0x80 | (($c >> 18) & 0x3f),
37 0x80 | (($c >> 12) & 0x3f),
38 0x80 | (($c >> 6) & 0x3f),
45 $unicodedata = "UnicodeData.txt";
47 # read list of all Unicode names
48 if (!open(UDATA, $unicodedata) && !open(UDATA, "$unicodedata")) {
49 die ("Can't open Unicode database '$unicodedata':\n$!\n\n" .
50 "Please make sure that you have downloaded the file\n" .
51 "ftp://ftp.unicode.org/Public/UNIDATA/UnicodeData-Latest.txt\n");
54 if (/^([0-9,A-F]{4});([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*);([^;]*)$/) {
58 die("Syntax error in line '$_' in file '$unicodedata'");
63 # read mapping (from http://wsinwp07.win.tue.nl:1234/unicode/keysym.map)
64 open(LIST, "<keysym.map") || die ("Can't open map file:\n$!\n");
66 if (/^0x([0-9a-f]{4})\s+U([0-9a-f]{4})\s*(\#.*)?$/){
69 $keysym_to_ucs{$keysym} = $ucs;
70 } elsif (/^\s*\#/ || /^\s*$/) {
72 die("Syntax error in 'list' in line\n$_\n");
77 # read entries in keysymdef.h
78 open(LIST, "</usr/include/X11/keysymdef.h") || die ("Can't open keysymdef.h:\n$!\n");
80 if (/^\#define\s+XK_([A-Za-z_0-9]+)\s+0x([0-9a-fA-F]+)\s*(\/.*)?$/) {
81 next if /\/\* deprecated \*\//;
84 $keysym_to_keysymname{$keysym} = $keysymname;
91 * This module converts keysym values into the corresponding ISO 10646-1
92 * (UCS, Unicode) values.
94 * The array keysymtab[] contains pairs of X11 keysym values for graphical
95 * characters and the corresponding Unicode value. The function
96 * keysym2ucs() maps a keysym onto a Unicode value using a binary search,
97 * therefore keysymtab[] must remain SORTED by keysym value.
99 * The keysym -> UTF-8 conversion will hopefully one day be provided
100 * by Xlib via XmbLookupString() and should ideally not have to be
101 * done in X applications. But we are not there yet.
103 * We allow to represent any UCS character in the range U+00000000 to
104 * U+00FFFFFF by a keysym value in the range 0x01000000 to 0x01ffffff.
105 * This admittedly does not cover the entire 31-bit space of UCS, but
106 * it does cover all of the characters up to U+10FFFF, which can be
107 * represented by UTF-16, and more, and it is very unlikely that higher
108 * UCS codes will ever be assigned by ISO. So to get Unicode character
109 * U+ABCD you can directly use keysym 0x1000abcd.
111 * NOTE: The comments in the table below contain the actual character
112 * encoded in UTF-8, so for viewing and editing best use an editor in
115 * Author: Markus G. Kuhn <mkuhn\@acm.org>, University of Cambridge, June 1999
117 * Special thanks to Richard Verhoeven <river\@win.tue.nl> for preparing
118 * an initial draft of the mapping table.
120 * This software is in the public domain. Share and enjoy!
123 #include <keysym2ucs.h>
126 unsigned short keysym;
131 for $keysym (sort {$a <=> $b} keys(%keysym_to_keysymname)) {
132 $ucs = $keysym_to_ucs{$keysym};
133 next if $keysym >= 0xf000 || $keysym < 0x100;
135 printf(" { 0x%04x, 0x%04x }, /*%28s %s %s */\n",
136 $keysym, $ucs, $keysym_to_keysymname{$keysym}, utf8($ucs),
137 defined($name{$ucs}) ? $name{$ucs} : "???" );
139 printf("/* 0x%04x %39s ? ??? */\n",
140 $keysym, $keysym_to_keysymname{$keysym});
147 long keysym2ucs(KeySym keysym)
150 int max = sizeof(keysymtab) / sizeof(struct codepair) - 1;
153 /* first check for Latin-1 characters (1:1 mapping) */
154 if ((keysym >= 0x0020 && keysym <= 0x007e) ||
155 (keysym >= 0x00a0 && keysym <= 0x00ff))
158 /* also check for directly encoded 24-bit UCS characters */
159 if ((keysym & 0xff000000) == 0x01000000)
160 return keysym & 0x00ffffff;
162 /* binary search in table */
164 mid = (min + max) / 2;
165 if (keysymtab[mid].keysym < keysym)
167 else if (keysymtab[mid].keysym > keysym)
171 return keysymtab[mid].ucs;
175 /* no matching Unicode value found */