git.sur5r.net Git - i3/i3/blob - src/config_parser.c

   1 #undef I3__FILE__
   2 #define I3__FILE__ "config_parser.c"
   3 /*
   4  * vim:ts=4:sw=4:expandtab
   5  *
   6  * i3 - an improved dynamic tiling window manager
   7  * © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE)
   8  *
   9  * config_parser.c: hand-written parser to parse configuration directives.
  10  *
  11  * See also src/commands_parser.c for rationale on why we use a custom parser.
  12  *
  13  * This parser works VERY MUCH like src/commands_parser.c, so read that first.
  14  * The differences are:
  15  *
  16  * 1. config_parser supports the 'number' token type (in addition to 'word' and
  17  *    'string'). Numbers are referred to using &num (like $str).
  18  *
  19  * 2. Criteria are not executed immediately, they are just stored.
  20  *
  21  * 3. config_parser recognizes \n and \r as 'end' token, while commands_parser
  22  *    ignores them.
  23  *
  24  * 4. config_parser skips the current line on invalid inputs and follows the
  25  *    nearest <error> token.
  26  *
  27  */
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <unistd.h>
  32 #include <stdbool.h>
  33 #include <stdint.h>
  34
  35 #include "all.h"
  36
  37 // Macros to make the YAJL API a bit easier to use.
  38 #define y(x, ...) yajl_gen_ ## x (command_output.json_gen, ##__VA_ARGS__)
  39 #define ystr(str) yajl_gen_string(command_output.json_gen, (unsigned char*)str, strlen(str))
  40
  41 /*******************************************************************************
  42  * The data structures used for parsing. Essentially the current state and a
  43  * list of tokens for that state.
  44  *
  45  * The GENERATED_* files are generated by generate-commands-parser.pl with the
  46  * input parser-specs/configs.spec.
  47  ******************************************************************************/
  48
  49 #include "GENERATED_config_enums.h"
  50
  51 typedef struct token {
  52     char *name;
  53     char *identifier;
  54     /* This might be __CALL */
  55     cmdp_state next_state;
  56     union {
  57         uint16_t call_identifier;
  58     } extra;
  59 } cmdp_token;
  60
  61 typedef struct tokenptr {
  62     cmdp_token *array;
  63     int n;
  64 } cmdp_token_ptr;
  65
  66 #include "GENERATED_config_tokens.h"
  67
  68 /*******************************************************************************
  69  * The (small) stack where identified literals are stored during the parsing
  70  * of a single command (like $workspace).
  71  ******************************************************************************/
  72
  73 struct stack_entry {
  74     /* Just a pointer, not dynamically allocated. */
  75     const char *identifier;
  76     enum {
  77         STACK_STR = 0,
  78         STACK_LONG = 1,
  79     } type;
  80     union {
  81         char *str;
  82         long num;
  83     } val;
  84 };
  85
  86 /* 10 entries should be enough for everybody. */
  87 static struct stack_entry stack[10];
  88
  89 /*
  90  * Pushes a string (identified by 'identifier') on the stack. We simply use a
  91  * single array, since the number of entries we have to store is very small.
  92  *
  93  */
  94 static void push_string(const char *identifier, const char *str) {
  95     for (int c = 0; c < 10; c++) {
  96         if (stack[c].identifier != NULL &&
  97             strcmp(stack[c].identifier, identifier) != 0)
  98             continue;
  99         if (stack[c].identifier == NULL) {
 100             /* Found a free slot, let’s store it here. */
 101             stack[c].identifier = identifier;
 102             stack[c].val.str = sstrdup(str);
 103             stack[c].type = STACK_STR;
 104         } else {
 105             /* Append the value. */
 106             char *prev = stack[c].val.str;
 107             sasprintf(&(stack[c].val.str), "%s,%s", prev, str);
 108             free(prev);
 109         }
 110         return;
 111     }
 112
 113     /* When we arrive here, the stack is full. This should not happen and
 114      * means there’s either a bug in this parser or the specification
 115      * contains a command with more than 10 identified tokens. */
 116     fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
 117                     "in the code, or a new command which contains more than "
 118                     "10 identified tokens.\n");
 119     exit(1);
 120 }
 121
 122 static void push_long(const char *identifier, long num) {
 123     for (int c = 0; c < 10; c++) {
 124         if (stack[c].identifier != NULL)
 125             continue;
 126         /* Found a free slot, let’s store it here. */
 127         stack[c].identifier = identifier;
 128         stack[c].val.num = num;
 129         stack[c].type = STACK_LONG;
 130         return;
 131     }
 132
 133     /* When we arrive here, the stack is full. This should not happen and
 134      * means there’s either a bug in this parser or the specification
 135      * contains a command with more than 10 identified tokens. */
 136     fprintf(stderr, "BUG: commands_parser stack full. This means either a bug "
 137                     "in the code, or a new command which contains more than "
 138                     "10 identified tokens.\n");
 139     exit(1);
 140
 141 }
 142
 143 static const char *get_string(const char *identifier) {
 144     for (int c = 0; c < 10; c++) {
 145         if (stack[c].identifier == NULL)
 146             break;
 147         if (strcmp(identifier, stack[c].identifier) == 0)
 148             return stack[c].val.str;
 149     }
 150     return NULL;
 151 }
 152
 153 static const long get_long(const char *identifier) {
 154     for (int c = 0; c < 10; c++) {
 155         if (stack[c].identifier == NULL)
 156             break;
 157         if (strcmp(identifier, stack[c].identifier) == 0)
 158             return stack[c].val.num;
 159     }
 160     return 0;
 161 }
 162
 163 static void clear_stack(void) {
 164     for (int c = 0; c < 10; c++) {
 165         if (stack[c].type == STACK_STR && stack[c].val.str != NULL)
 166             free(stack[c].val.str);
 167         stack[c].identifier = NULL;
 168         stack[c].val.str = NULL;
 169         stack[c].val.num = 0;
 170     }
 171 }
 172
 173 // TODO: remove this if it turns out we don’t need it for testing.
 174 #if 0
 175 /*******************************************************************************
 176  * A dynamically growing linked list which holds the criteria for the current
 177  * command.
 178  ******************************************************************************/
 179
 180 typedef struct criterion {
 181     char *type;
 182     char *value;
 183
 184     TAILQ_ENTRY(criterion) criteria;
 185 } criterion;
 186
 187 static TAILQ_HEAD(criteria_head, criterion) criteria =
 188   TAILQ_HEAD_INITIALIZER(criteria);
 189
 190 /*
 191  * Stores the given type/value in the list of criteria.
 192  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 193  *
 194  */
 195 static void push_criterion(void *unused_criteria, const char *type,
 196                            const char *value) {
 197     struct criterion *criterion = malloc(sizeof(struct criterion));
 198     criterion->type = strdup(type);
 199     criterion->value = strdup(value);
 200     TAILQ_INSERT_TAIL(&criteria, criterion, criteria);
 201 }
 202
 203 /*
 204  * Clears the criteria linked list.
 205  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 206  *
 207  */
 208 static void clear_criteria(void *unused_criteria) {
 209     struct criterion *criterion;
 210     while (!TAILQ_EMPTY(&criteria)) {
 211         criterion = TAILQ_FIRST(&criteria);
 212         free(criterion->type);
 213         free(criterion->value);
 214         TAILQ_REMOVE(&criteria, criterion, criteria);
 215         free(criterion);
 216     }
 217 }
 218 #endif
 219
 220 /*******************************************************************************
 221  * The parser itself.
 222  ******************************************************************************/
 223
 224 static cmdp_state state;
 225 static Match current_match;
 226 static struct ConfigResult subcommand_output;
 227 static struct ConfigResult command_output;
 228
 229 /* A list which contains the states that lead to the current state, e.g.
 230  * INITIAL, WORKSPACE_LAYOUT.
 231  * When jumping back to INITIAL, statelist_idx will simply be set to 1
 232  * (likewise for other states, e.g. MODE or BAR).
 233  * This list is used to process the nearest error token. */
 234 static cmdp_state statelist[10] = { INITIAL };
 235 /* NB: statelist_idx points to where the next entry will be inserted */
 236 static int statelist_idx = 1;
 237
 238 #include "GENERATED_config_call.h"
 239
 240
 241 static void next_state(const cmdp_token *token) {
 242     cmdp_state _next_state = token->next_state;
 243
 244         //printf("token = name %s identifier %s\n", token->name, token->identifier);
 245         //printf("next_state = %d\n", token->next_state);
 246     if (token->next_state == __CALL) {
 247         subcommand_output.json_gen = command_output.json_gen;
 248         GENERATED_call(token->extra.call_identifier, &subcommand_output);
 249         _next_state = subcommand_output.next_state;
 250         clear_stack();
 251     }
 252
 253     state = _next_state;
 254     if (state == INITIAL) {
 255         clear_stack();
 256     }
 257
 258     /* See if we are jumping back to a state in which we were in previously
 259      * (statelist contains INITIAL) and just move statelist_idx accordingly. */
 260     for (int i = 0; i < statelist_idx; i++) {
 261         if (statelist[i] != _next_state)
 262             continue;
 263         statelist_idx = i+1;
 264         return;
 265     }
 266
 267     /* Otherwise, the state is new and we add it to the list */
 268     statelist[statelist_idx++] = _next_state;
 269 }
 270
 271 /*
 272  * Returns a pointer to the start of the line (one byte after the previous \r,
 273  * \n) or the start of the input, if this is the first line.
 274  *
 275  */
 276 static const char *start_of_line(const char *walk, const char *beginning) {
 277     while (*walk != '\n' && *walk != '\r' && walk >= beginning) {
 278         walk--;
 279     }
 280
 281     return walk + 1;
 282 }
 283
 284 /*
 285  * Copies the line and terminates it at the next \n, if any.
 286  *
 287  * The caller has to free() the result.
 288  *
 289  */
 290 static char *single_line(const char *start) {
 291     char *result = sstrdup(start);
 292     char *end = strchr(result, '\n');
 293     if (end != NULL)
 294         *end = '\0';
 295     return result;
 296 }
 297
 298 struct ConfigResult *parse_config(const char *input, struct context *context) {
 299     /* Dump the entire config file into the debug log. We cannot just use
 300      * DLOG("%s", input); because one log message must not exceed 4 KiB. */
 301     const char *dumpwalk = input;
 302     int linecnt = 1;
 303     while (*dumpwalk != '\0') {
 304         char *next_nl = strchr(dumpwalk, '\n');
 305         if (next_nl != NULL) {
 306             DLOG("CONFIG(line %3d): %.*s\n", linecnt, (int)(next_nl - dumpwalk), dumpwalk);
 307             dumpwalk = next_nl + 1;
 308         } else {
 309             DLOG("CONFIG(line %3d): %s\n", linecnt, dumpwalk);
 310             break;
 311         }
 312         linecnt++;
 313     }
 314     state = INITIAL;
 315     statelist_idx = 1;
 316
 317 /* A YAJL JSON generator used for formatting replies. */
 318 #if YAJL_MAJOR >= 2
 319     command_output.json_gen = yajl_gen_alloc(NULL);
 320 #else
 321     command_output.json_gen = yajl_gen_alloc(NULL, NULL);
 322 #endif
 323
 324     y(array_open);
 325
 326     const char *walk = input;
 327     const size_t len = strlen(input);
 328     int c;
 329     const cmdp_token *token;
 330     bool token_handled;
 331     linecnt = 1;
 332
 333     // TODO: make this testable
 334 #ifndef TEST_PARSER
 335     cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
 336 #endif
 337
 338     /* The "<=" operator is intentional: We also handle the terminating 0-byte
 339      * explicitly by looking for an 'end' token. */
 340     while ((walk - input) <= len) {
 341         /* Skip whitespace before every token, newlines are relevant since they
 342          * separate configuration directives. */
 343         while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
 344             walk++;
 345
 346                 //printf("remaining input: %s\n", walk);
 347
 348         cmdp_token_ptr *ptr = &(tokens[state]);
 349         token_handled = false;
 350         for (c = 0; c < ptr->n; c++) {
 351             token = &(ptr->array[c]);
 352
 353             /* A literal. */
 354             if (token->name[0] == '\'') {
 355                 if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
 356                     if (token->identifier != NULL)
 357                         push_string(token->identifier, token->name + 1);
 358                     walk += strlen(token->name) - 1;
 359                     next_state(token);
 360                     token_handled = true;
 361                     break;
 362                 }
 363                 continue;
 364             }
 365
 366             if (strcmp(token->name, "number") == 0) {
 367                 /* Handle numbers. We only accept decimal numbers for now. */
 368                 char *end = NULL;
 369                 errno = 0;
 370                 long int num = strtol(walk, &end, 10);
 371                 if ((errno == ERANGE && (num == LONG_MIN || num == LONG_MAX)) ||
 372                     (errno != 0 && num == 0))
 373                     continue;
 374
 375                 /* No valid numbers found */
 376                 if (end == walk)
 377                     continue;
 378
 379                 if (token->identifier != NULL)
 380                     push_long(token->identifier, num);
 381
 382                 /* Set walk to the first non-number character */
 383                 walk = end;
 384                 next_state(token);
 385                 token_handled = true;
 386                 break;
 387             }
 388
 389             if (strcmp(token->name, "string") == 0 ||
 390                 strcmp(token->name, "word") == 0) {
 391                 const char *beginning = walk;
 392                 /* Handle quoted strings (or words). */
 393                 if (*walk == '"') {
 394                     beginning++;
 395                     walk++;
 396                     while (*walk != '\0' && (*walk != '"' || *(walk-1) == '\\'))
 397                         walk++;
 398                 } else {
 399                     if (token->name[0] == 's') {
 400                         while (*walk != '\0' && *walk != '\r' && *walk != '\n')
 401                             walk++;
 402                     } else {
 403                         /* For a word, the delimiters are white space (' ' or
 404                          * '\t'), closing square bracket (]), comma (,) and
 405                          * semicolon (;). */
 406                         while (*walk != ' ' && *walk != '\t' &&
 407                                *walk != ']' && *walk != ',' &&
 408                                *walk !=  ';' && *walk != '\r' &&
 409                                *walk != '\n' && *walk != '\0')
 410                             walk++;
 411                     }
 412                 }
 413                 if (walk != beginning) {
 414                     char *str = scalloc(walk-beginning + 1);
 415                     /* We copy manually to handle escaping of characters. */
 416                     int inpos, outpos;
 417                     for (inpos = 0, outpos = 0;
 418                          inpos < (walk-beginning);
 419                          inpos++, outpos++) {
 420                         /* We only handle escaped double quotes to not break
 421                          * backwards compatibility with people using \w in
 422                          * regular expressions etc. */
 423                         if (beginning[inpos] == '\\' && beginning[inpos+1] == '"')
 424                             inpos++;
 425                         str[outpos] = beginning[inpos];
 426                     }
 427                     if (token->identifier)
 428                         push_string(token->identifier, str);
 429                     free(str);
 430                     /* If we are at the end of a quoted string, skip the ending
 431                      * double quote. */
 432                     if (*walk == '"')
 433                         walk++;
 434                     next_state(token);
 435                     token_handled = true;
 436                     break;
 437                 }
 438             }
 439
 440             if (strcmp(token->name, "end") == 0) {
 441                 //printf("checking for end: *%s*\n", walk);
 442                 if (*walk == '\0' || *walk == '\n' || *walk == '\r') {
 443                     next_state(token);
 444                     token_handled = true;
 445                     /* To make sure we start with an appropriate matching
 446                      * datastructure for commands which do *not* specify any
 447                      * criteria, we re-initialize the criteria system after
 448                      * every command. */
 449                     // TODO: make this testable
 450 #ifndef TEST_PARSER
 451                     cfg_criteria_init(&current_match, &subcommand_output, INITIAL);
 452 #endif
 453                     linecnt++;
 454                     walk++;
 455                     break;
 456                }
 457            }
 458         }
 459
 460         if (!token_handled) {
 461             /* Figure out how much memory we will need to fill in the names of
 462              * all tokens afterwards. */
 463             int tokenlen = 0;
 464             for (c = 0; c < ptr->n; c++)
 465                 tokenlen += strlen(ptr->array[c].name) + strlen("'', ");
 466
 467             /* Build up a decent error message. We include the problem, the
 468              * full input, and underline the position where the parser
 469              * currently is. */
 470             char *errormessage;
 471             char *possible_tokens = smalloc(tokenlen + 1);
 472             char *tokenwalk = possible_tokens;
 473             for (c = 0; c < ptr->n; c++) {
 474                 token = &(ptr->array[c]);
 475                 if (token->name[0] == '\'') {
 476                     /* A literal is copied to the error message enclosed with
 477                      * single quotes. */
 478                     *tokenwalk++ = '\'';
 479                     strcpy(tokenwalk, token->name + 1);
 480                     tokenwalk += strlen(token->name + 1);
 481                     *tokenwalk++ = '\'';
 482                 } else {
 483                     /* Skip error tokens in error messages, they are used
 484                      * internally only and might confuse users. */
 485                     if (strcmp(token->name, "error") == 0)
 486                         continue;
 487                     /* Any other token is copied to the error message enclosed
 488                      * with angle brackets. */
 489                     *tokenwalk++ = '<';
 490                     strcpy(tokenwalk, token->name);
 491                     tokenwalk += strlen(token->name);
 492                     *tokenwalk++ = '>';
 493                 }
 494                 if (c < (ptr->n - 1)) {
 495                     *tokenwalk++ = ',';
 496                     *tokenwalk++ = ' ';
 497                 }
 498             }
 499             *tokenwalk = '\0';
 500             sasprintf(&errormessage, "Expected one of these tokens: %s",
 501                       possible_tokens);
 502             free(possible_tokens);
 503
 504
 505             /* Go back to the beginning of the line */
 506             const char *error_line = start_of_line(walk, input);
 507
 508             /* Contains the same amount of characters as 'input' has, but with
 509              * the unparseable part highlighted using ^ characters. */
 510             char *position = scalloc(strlen(error_line) + 1);
 511             const char *copywalk;
 512             for (copywalk = error_line;
 513                  *copywalk != '\n' && *copywalk != '\r' && *copywalk != '\0';
 514                  copywalk++)
 515                 position[(copywalk - error_line)] = (copywalk >= walk ? '^' : (*copywalk == '\t' ? '\t' : ' '));
 516             position[(copywalk - error_line)] = '\0';
 517
 518             ELOG("CONFIG: %s\n", errormessage);
 519             ELOG("CONFIG: (in file %s)\n", context->filename);
 520             char *error_copy = single_line(error_line);
 521
 522             /* Print context lines *before* the error, if any. */
 523             if (linecnt > 1) {
 524                 const char *context_p1_start = start_of_line(error_line-2, input);
 525                 char *context_p1_line = single_line(context_p1_start);
 526                 if (linecnt > 2) {
 527                     const char *context_p2_start = start_of_line(context_p1_start-2, input);
 528                     char *context_p2_line = single_line(context_p2_start);
 529                     ELOG("CONFIG: Line %3d: %s\n", linecnt - 2, context_p2_line);
 530                     free(context_p2_line);
 531                 }
 532                 ELOG("CONFIG: Line %3d: %s\n", linecnt - 1, context_p1_line);
 533                 free(context_p1_line);
 534             }
 535             ELOG("CONFIG: Line %3d: %s\n", linecnt, error_copy);
 536             ELOG("CONFIG:           %s\n", position);
 537             free(error_copy);
 538             /* Print context lines *after* the error, if any. */
 539             for (int i = 0; i < 2; i++) {
 540                 char *error_line_end = strchr(error_line, '\n');
 541                 if (error_line_end != NULL && *(error_line_end + 1) != '\0') {
 542                     error_line = error_line_end + 1;
 543                     error_copy = single_line(error_line);
 544                     ELOG("CONFIG: Line %3d: %s\n", linecnt + i + 1, error_copy);
 545                     free(error_copy);
 546                 }
 547             }
 548
 549             context->has_errors = true;
 550
 551             /* Format this error message as a JSON reply. */
 552             y(map_open);
 553             ystr("success");
 554             y(bool, false);
 555             /* We set parse_error to true to distinguish this from other
 556              * errors. i3-nagbar is spawned upon keypresses only for parser
 557              * errors. */
 558             ystr("parse_error");
 559             y(bool, true);
 560             ystr("error");
 561             ystr(errormessage);
 562             ystr("input");
 563             ystr(input);
 564             ystr("errorposition");
 565             ystr(position);
 566             y(map_close);
 567
 568             /* Skip the rest of this line, but continue parsing. */
 569             while ((walk - input) <= len && *walk != '\n')
 570                 walk++;
 571
 572             free(position);
 573             free(errormessage);
 574             clear_stack();
 575
 576             /* To figure out in which state to go (e.g. MODE or INITIAL),
 577              * we find the nearest state which contains an <error> token
 578              * and follow that one. */
 579             bool error_token_found = false;
 580             for (int i = statelist_idx-1; (i >= 0) && !error_token_found; i--) {
 581                 cmdp_token_ptr *errptr = &(tokens[statelist[i]]);
 582                 for (int j = 0; j < errptr->n; j++) {
 583                     if (strcmp(errptr->array[j].name, "error") != 0)
 584                         continue;
 585                     next_state(&(errptr->array[j]));
 586                     error_token_found = true;
 587                     break;
 588                 }
 589             }
 590
 591             assert(error_token_found);
 592         }
 593     }
 594
 595     y(array_close);
 596
 597     return &command_output;
 598 }
 599
 600 /*******************************************************************************
 601  * Code for building the stand-alone binary test.commands_parser which is used
 602  * by t/187-commands-parser.t.
 603  ******************************************************************************/
 604
 605 #ifdef TEST_PARSER
 606
 607 /*
 608  * Logs the given message to stdout while prefixing the current time to it,
 609  * but only if debug logging was activated.
 610  * This is to be called by DLOG() which includes filename/linenumber
 611  *
 612  */
 613 void debuglog(char *fmt, ...) {
 614     va_list args;
 615
 616     va_start(args, fmt);
 617     fprintf(stdout, "# ");
 618     vfprintf(stdout, fmt, args);
 619     va_end(args);
 620 }
 621
 622 void errorlog(char *fmt, ...) {
 623     va_list args;
 624
 625     va_start(args, fmt);
 626     vfprintf(stderr, fmt, args);
 627     va_end(args);
 628 }
 629
 630 static int criteria_next_state;
 631
 632 void cfg_criteria_init(I3_CFG, int _state) {
 633     criteria_next_state = _state;
 634 }
 635
 636 void cfg_criteria_add(I3_CFG, const char *ctype, const char *cvalue) {
 637 }
 638
 639 void cfg_criteria_pop_state(I3_CFG) {
 640     result->next_state = criteria_next_state;
 641 }
 642
 643 int main(int argc, char *argv[]) {
 644     if (argc < 2) {
 645         fprintf(stderr, "Syntax: %s <command>\n", argv[0]);
 646         return 1;
 647     }
 648     struct context context;
 649     context.filename = "<stdin>";
 650     parse_config(argv[1], &context);
 651 }
 652 #endif