5 /* token classification
14 /* struct token *tok_class()
16 /* tok_class() collects single and composite tokens, and
17 /* recognizes keywords.
18 /* At present, the only composite tokens are ()-delimited,
19 /* comma-separated lists, and non-whitespace tokens with attached
20 /* whitespace or comment tokens.
22 /* Source transformations are: __DATE__ and __TIME__ are rewritten
23 /* to string constants with the current date and time, respectively.
24 /* Multiple string constants are concatenated. Optionally, "void *"
25 /* is mapped to "char *", and plain "void" to "int".
27 /* tok_unget() implements an arbitrary amount of token pushback.
28 /* Only tokens obtained through tok_class() should be given to
29 /* tok_unget(). This function accepts a list of tokens in
30 /* last-read-first order.
32 /* The code complains if input terminates in the middle of a list.
34 /* Does not preserve white space at the beginning of a list element
35 /* or after the end of a list.
38 /* Eindhoven University of Technology
39 /* Department of Mathematics and Computer Science
40 /* Den Dolech 2, P.O. Box 513, 5600 MB Eindhoven, The Netherlands
47 static char class_sccsid[] = "@(#) tok_class.c 1.4 92/01/15 21:53:02";
53 extern char *strcpy();
57 /* Application-specific stuff */
64 static struct token *tok_list();
65 static void tok_list_struct();
66 static void tok_list_append();
67 static void tok_strcat();
68 static void tok_time();
69 static void tok_date();
70 static void tok_space_append();
72 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
73 static void tok_void(); /* rewrite void keyword */
76 static struct token *tok_buf = 0; /* token push-back storage */
78 /* TOK_PREPEND - add token to LIFO queue, return head */
80 #define TOK_PREPEND(list,t) (t->next = list, list = t)
82 /* tok_space_append - append trailing space except at start of or after list */
84 static void tok_space_append(list, t)
85 register struct token *list;
86 register struct token *t;
90 * The head/tail fields of a token do triple duty. They are used to keep
91 * track of the members that make up a (list); to keep track of the
92 * non-blank tokens that make up one list member; and, finally, to tack
93 * whitespace and comment tokens onto the non-blank tokens that make up
96 * Within a (list), white space and comment tokens are always tacked onto
97 * the non-blank tokens to avoid parsing complications later on. For this
98 * reason, blanks and comments at the beginning of a list member are
99 * discarded because there is no token to tack them onto. (Well, we could
100 * start each list member with a dummy token, but that would mess up the
101 * whole unprototyper).
103 * Blanks or comments that follow a (list) are discarded, because the
104 * head/tail fields of a (list) are already being used for other
107 * Newlines within a (list) are discarded because they can mess up the
108 * output when we rewrite function headers. The output routines will
109 * regenerate discarded newlines, anyway.
112 if (list == 0 || list->tokno == TOK_LIST) {
115 tok_list_append(list, t);
119 /* tok_class - discriminate single tokens, keywords, and composite tokens */
121 struct token *tok_class()
123 register struct token *t;
124 register struct symbol *s;
127 * Use push-back token, if available. Push-back tokens are already
128 * canonical and can be passed on to the caller without further
137 /* Read a new token and canonicalize it. */
141 case '(': /* beginning of list */
144 case TOK_WORD: /* look up keyword */
145 if ((s = sym_find(t->vstr->str))) {
147 case TOK_TIME: /* map __TIME__ to string */
149 tok_strcat(t); /* look for more strings */
151 case TOK_DATE: /* map __DATE__ to string */
153 tok_strcat(t); /* look for more strings */
155 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
156 case TOK_VOID: /* optionally map void types */
160 default: /* other keyword */
166 case '"': /* string, look for more */
174 /* tok_list - collect ()-delimited, comma-separated list of tokens */
176 static struct token *tok_list(t)
179 register struct token *list = tok_alloc();
183 /* Save context of '(' for diagnostics. */
188 list->tokno = TOK_LIST;
189 list->head = list->tail = t;
190 list->path = t->path;
191 list->line = t->line;
193 strcpy(list->vstr->str, "LIST");
197 * Read until the matching ')' is found, accounting for structured stuff
198 * (enclosed by '{' and '}' tokens). Break the list up at each ',' token,
199 * and try to preserve as much whitespace as possible. Newlines are
200 * discarded so that they will not mess up the layout when we rewrite
201 * argument lists. The output routines will regenerate discarded
205 while (t = tok_class()) { /* skip blanks */
207 case ')': /* end of list */
208 tok_list_append(list, t);
210 case '{': /* struct/union type */
211 tok_list_struct(list->tail, t);
213 case TOK_WSPACE: /* preserve trailing blanks */
214 tok_space_append(list->tail->tail, t); /* except after list */
216 case '\n': /* fix newlines later */
219 case ',': /* list separator */
220 tok_list_append(list, t);
223 tok_list_append(list->tail, t);
227 error_where(filename, lineno, "unmatched '('");
228 return (list); /* do not waste any data */
231 /* tok_list_struct - collect structured type info within list */
233 static void tok_list_struct(list, t)
234 register struct token *list;
235 register struct token *t;
241 * Save context of '{' for diagnostics. This routine is called by the one
242 * that collects list members. If the '}' is not found, the list
243 * collector will not see the closing ')' either.
249 tok_list_append(list, t);
252 * Collect tokens until the matching '}' is found. Try to preserve as
253 * much whitespace as possible. Newlines are discarded so that they do
254 * not interfere when rewriting argument lists. The output routines will
255 * regenerate discarded newlines.
258 while (t = tok_class()) {
260 case TOK_WSPACE: /* preserve trailing blanks */
261 tok_space_append(list->tail, t); /* except after list */
263 case '\n': /* fix newlines later */
266 case '{': /* recurse */
267 tok_list_struct(list, t);
270 tok_list_append(list, t);
273 tok_list_append(list, t);
277 error_where(filename, lineno, "unmatched '{'");
280 /* tok_strcat - concatenate multiple string constants */
282 static void tok_strcat(t1)
283 register struct token *t1;
285 register struct token *t2;
286 register struct token *lookahead = 0;
289 * Read ahead past whitespace, comments and newlines. If we find a string
290 * token, concatenate it with the previous one and push back the
291 * intervening tokens (thus preserving as much information as possible).
292 * If we find something else, push back all lookahead tokens.
295 #define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
297 while (t2 = tok_class()) {
299 case TOK_WSPACE: /* read past comments/blanks */
300 case '\n': /* read past newlines */
301 TOK_PREPEND(lookahead, t2);
303 case '"': /* concatenate string tokens */
304 if (vs_strcpy(t1->vstr,
305 t1->vstr->str + strlen(t1->vstr->str) - 1,
306 t2->vstr->str + 1) == 0)
307 fatal("out of memory");
310 default: /* something else, push back */
315 PUSHBACK_AND_RETURN; /* hit EOF */
318 #if defined(MAP_VOID_STAR) || defined(MAP_VOID)
320 /* tok_void - support for compilers that have problems with "void" */
322 static void tok_void(t)
323 register struct token *t;
325 register struct token *t2;
326 register struct token *lookahead = 0;
329 * Look ahead beyond whitespace, comments and newlines until we see a '*'
330 * token. If one is found, replace "void" by "char". If we find something
331 * else, and if "void" should always be mapped, replace "void" by "int".
332 * Always push back the lookahead tokens.
334 * XXX The code also replaces the (void) argument list; this must be
335 * accounted for later on. The alternative would be to add (in unproto.c)
336 * TOK_VOID cases all over the place and that would be too error-prone.
339 #define PUSHBACK_AND_RETURN { if (lookahead) tok_unget(lookahead); return; }
341 while (t2 = tok_class()) {
342 switch (TOK_PREPEND(lookahead, t2)->tokno) {
343 case TOK_WSPACE: /* read past comments/blanks */
344 case '\n': /* read past newline */
346 case '*': /* "void *" -> "char *" */
347 if (vs_strcpy(t->vstr, t->vstr->str, "char") == 0)
348 fatal("out of memory");
351 #ifdef MAP_VOID /* plain "void" -> "int" */
352 if (vs_strcpy(t->vstr, t->vstr->str, "int") == 0)
353 fatal("out of memory");
358 PUSHBACK_AND_RETURN; /* hit EOF */
363 /* tok_time - rewrite __TIME__ to "hh:mm:ss" string constant */
365 static void tok_time(t)
373 * Using sprintf() to select parts of a string is gross, but this should
379 sprintf(buf, "\"%.8s\"", cp + 11);
380 if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
381 fatal("out of memory");
385 /* tok_date - rewrite __DATE__ to "Mmm dd yyyy" string constant */
387 static void tok_date(t)
395 * Using sprintf() to select parts of a string is gross, but this should
401 sprintf(buf, "\"%.3s %.2s %.4s\"", cp + 4, cp + 8, cp + 20);
402 if (vs_strcpy(t->vstr, t->vstr->str, buf) == 0)
403 fatal("out of memory");
407 /* tok_unget - push back one or more possibly composite tokens */
410 register struct token *t;
412 register struct token *next;
416 TOK_PREPEND(tok_buf, t);
420 /* tok_list_append - append data to list */
422 static void tok_list_append(h, t)
427 h->head = h->tail = t;