git.sur5r.net Git - i3/i3/blob - src/commands_parser.c

   1 /*
   2  * vim:ts=4:sw=4:expandtab
   3  *
   4  * i3 - an improved dynamic tiling window manager
   5  * © 2009-2012 Michael Stapelberg and contributors (see also: LICENSE)
   6  *
   7  * commands_parser.c: hand-written parser to parse commands (commands are what
   8  * you bind on keys and what you can send to i3 using the IPC interface, like
   9  * 'move left' or 'workspace 4').
  10  *
  11  * We use a hand-written parser instead of lex/yacc because our commands are
  12  * easy for humans, not for computers. Thus, it’s quite hard to specify a
  13  * context-free grammar for the commands. A PEG grammar would be easier, but
  14  * there’s downsides to every PEG parser generator I have come accross so far.
  15  *
  16  * This parser is basically a state machine which looks for literals or strings
  17  * and can push either on a stack. After identifying a literal or string, it
  18  * will either transition to the current state, to a different state, or call a
  19  * function (like cmd_move()).
  20  *
  21  * Special care has been taken that error messages are useful and the code is
  22  * well testable (when compiled with -DTEST_PARSER it will output to stdout
  23  * instead of actually calling any function).
  24  *
  25  */
  26 #include <stdio.h>
  27 #include <stdlib.h>
  28 #include <string.h>
  29 #include <unistd.h>
  30 #include <stdbool.h>
  31 #include <stdint.h>
  32
  33 #include "all.h"
  34 #include "queue.h"
  35
  36 /*******************************************************************************
  37  * The data structures used for parsing. Essentially the current state and a
  38  * list of tokens for that state.
  39  *
  40  * The GENERATED_* files are generated by generate-commands-parser.pl with the
  41  * input parser-specs/commands.spec.
  42  ******************************************************************************/
  43
  44 #include "GENERATED_enums.h"
  45
  46 typedef struct token {
  47     char *name;
  48     char *identifier;
  49     /* This might be __CALL */
  50     cmdp_state next_state;
  51     union {
  52         uint16_t call_identifier;
  53     } extra;
  54 } cmdp_token;
  55
  56 typedef struct tokenptr {
  57     cmdp_token *array;
  58     int n;
  59 } cmdp_token_ptr;
  60
  61 #include "GENERATED_tokens.h"
  62
  63 /*******************************************************************************
  64  * The (small) stack where identified literals are stored during the parsing
  65  * of a single command (like $workspace).
  66  ******************************************************************************/
  67
  68 struct stack_entry {
  69     /* Just a pointer, not dynamically allocated. */
  70     const char *identifier;
  71     char *str;
  72 };
  73
  74 /* 10 entries should be enough for everybody. */
  75 static struct stack_entry stack[10];
  76
  77 /*
  78  * Pushes a string (identified by 'identifier') on the stack. We simply use a
  79  * single array, since the number of entries we have to store is very small.
  80  *
  81  */
  82 static void push_string(const char *identifier, char *str) {
  83     for (int c = 0; c < 10; c++) {
  84         if (stack[c].identifier != NULL)
  85             continue;
  86         /* Found a free slot, let’s store it here. */
  87         stack[c].identifier = identifier;
  88         stack[c].str = str;
  89         return;
  90     }
  91
  92     /* When we arrive here, the stack is full. This should not happen and
  93      * means there’s either a bug in this parser or the specification
  94      * contains a command with more than 10 identified tokens. */
  95     printf("argh! stack full\n");
  96     exit(1);
  97 }
  98
  99 // XXX: ideally, this would be const char. need to check if that works with all
 100 // called functions.
 101 static char *get_string(const char *identifier) {
 102     DLOG("Getting string %s from stack...\n", identifier);
 103     for (int c = 0; c < 10; c++) {
 104         if (stack[c].identifier == NULL)
 105             break;
 106         if (strcmp(identifier, stack[c].identifier) == 0)
 107             return stack[c].str;
 108     }
 109     return NULL;
 110 }
 111
 112 static void clear_stack() {
 113     DLOG("clearing stack.\n");
 114     for (int c = 0; c < 10; c++) {
 115         if (stack[c].str != NULL)
 116             free(stack[c].str);
 117         stack[c].identifier = NULL;
 118         stack[c].str = NULL;
 119     }
 120 }
 121
 122 // TODO: remove this if it turns out we don’t need it for testing.
 123 #if 0
 124 /*******************************************************************************
 125  * A dynamically growing linked list which holds the criteria for the current
 126  * command.
 127  ******************************************************************************/
 128
 129 typedef struct criterion {
 130     char *type;
 131     char *value;
 132
 133     TAILQ_ENTRY(criterion) criteria;
 134 } criterion;
 135
 136 static TAILQ_HEAD(criteria_head, criterion) criteria =
 137   TAILQ_HEAD_INITIALIZER(criteria);
 138
 139 /*
 140  * Stores the given type/value in the list of criteria.
 141  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 142  *
 143  */
 144 static void push_criterion(void *unused_criteria, const char *type,
 145                            const char *value) {
 146     struct criterion *criterion = malloc(sizeof(struct criterion));
 147     criterion->type = strdup(type);
 148     criterion->value = strdup(value);
 149     TAILQ_INSERT_TAIL(&criteria, criterion, criteria);
 150 }
 151
 152 /*
 153  * Clears the criteria linked list.
 154  * Accepts a pointer as first argument, since it is 'call'ed by the parser.
 155  *
 156  */
 157 static void clear_criteria(void *unused_criteria) {
 158     struct criterion *criterion;
 159     while (!TAILQ_EMPTY(&criteria)) {
 160         criterion = TAILQ_FIRST(&criteria);
 161         free(criterion->type);
 162         free(criterion->value);
 163         TAILQ_REMOVE(&criteria, criterion, criteria);
 164         free(criterion);
 165     }
 166 }
 167 #endif
 168
 169 /*******************************************************************************
 170  * The parser itself.
 171  ******************************************************************************/
 172
 173 static cmdp_state state;
 174 #ifndef TEST_PARSER
 175 static Match current_match;
 176 #endif
 177 static char *json_output;
 178
 179 #include "GENERATED_call.h"
 180
 181
 182 static void next_state(const cmdp_token *token) {
 183     if (token->next_state == __CALL) {
 184         DLOG("should call stuff, yay. call_id = %d\n",
 185                 token->extra.call_identifier);
 186         json_output = GENERATED_call(token->extra.call_identifier);
 187         clear_stack();
 188         return;
 189     }
 190
 191     state = token->next_state;
 192     if (state == INITIAL) {
 193         clear_stack();
 194     }
 195 }
 196
 197 /* TODO: Return parsing errors via JSON. */
 198 char *parse_command(const char *input) {
 199     DLOG("new parser handling: %s\n", input);
 200     state = INITIAL;
 201     json_output = NULL;
 202
 203     const char *walk = input;
 204     const size_t len = strlen(input);
 205     int c;
 206     const cmdp_token *token;
 207     bool token_handled;
 208
 209     // TODO: make this testable
 210 #ifndef TEST_PARSER
 211     cmd_criteria_init(&current_match);
 212 #endif
 213
 214     /* The "<=" operator is intentional: We also handle the terminating 0-byte
 215      * explicitly by looking for an 'end' token. */
 216     while ((walk - input) <= len) {
 217         /* skip whitespace before every token */
 218         while ((*walk == ' ' || *walk == '\t') && *walk != '\0')
 219             walk++;
 220
 221         DLOG("remaining input = %s\n", walk);
 222
 223         cmdp_token_ptr *ptr = &(tokens[state]);
 224         token_handled = false;
 225         for (c = 0; c < ptr->n; c++) {
 226             token = &(ptr->array[c]);
 227             DLOG("trying token %d = %s\n", c, token->name);
 228
 229             /* A literal. */
 230             if (token->name[0] == '\'') {
 231                 DLOG("literal\n");
 232                 if (strncasecmp(walk, token->name + 1, strlen(token->name) - 1) == 0) {
 233                     DLOG("found literal, moving to next state\n");
 234                     if (token->identifier != NULL)
 235                         push_string(token->identifier, strdup(token->name + 1));
 236                     walk += strlen(token->name) - 1;
 237                     next_state(token);
 238                     token_handled = true;
 239                     break;
 240                 }
 241                 continue;
 242             }
 243
 244             if (strcmp(token->name, "string") == 0 ||
 245                 strcmp(token->name, "word") == 0) {
 246                 DLOG("parsing this as a string\n");
 247                 const char *beginning = walk;
 248                 /* Handle quoted strings (or words). */
 249                 if (*walk == '"') {
 250                     beginning++;
 251                     walk++;
 252                     while (*walk != '"' || *(walk-1) == '\\')
 253                         walk++;
 254                 } else {
 255                     if (token->name[0] == 's') {
 256                         /* For a string (starting with 's'), the delimiters are
 257                          * comma (,) and semicolon (;) which introduce a new
 258                          * operation or command, respectively. */
 259                         while (*walk != ';' && *walk != ',' && *walk != '\0')
 260                             walk++;
 261                     } else {
 262                         /* For a word, the delimiters are white space (' ' or
 263                          * '\t'), closing square bracket (]), comma (,) and
 264                          * semicolon (;). */
 265                         while (*walk != ' ' && *walk != '\t' && *walk != ']' &&
 266                                *walk != ',' && *walk !=  ';' && *walk != '\0')
 267                             walk++;
 268                     }
 269                 }
 270                 if (walk != beginning) {
 271                     char *str = calloc(walk-beginning + 1, 1);
 272                     strncpy(str, beginning, walk-beginning);
 273                     if (token->identifier)
 274                         push_string(token->identifier, str);
 275                     DLOG("str is \"%s\"\n", str);
 276                     /* If we are at the end of a quoted string, skip the ending
 277                      * double quote. */
 278                     if (*walk == '"')
 279                         walk++;
 280                     next_state(token);
 281                     token_handled = true;
 282                     break;
 283                 }
 284             }
 285
 286             if (strcmp(token->name, "end") == 0) {
 287                 DLOG("checking for the end token.\n");
 288                 if (*walk == '\0' || *walk == ',' || *walk == ';') {
 289                     DLOG("yes, indeed. end\n");
 290                     walk++;
 291                     next_state(token);
 292                     token_handled = true;
 293                     /* To make sure we start with an appropriate matching
 294                      * datastructure for commands which do *not* specify any
 295                      * criteria, we re-initialize the criteria system after
 296                      * every command. */
 297                     // TODO: make this testable
 298 #ifndef TEST_PARSER
 299                     if (*walk == '\0' || *walk == ';')
 300                         cmd_criteria_init(&current_match);
 301 #endif
 302                     break;
 303                }
 304            }
 305         }
 306
 307         if (!token_handled) {
 308             /* Figure out how much memory we will need to fill in the names of
 309              * all tokens afterwards. */
 310             int tokenlen = 0;
 311             for (c = 0; c < ptr->n; c++)
 312                 tokenlen += strlen(ptr->array[c].name) + strlen("'', ");
 313
 314             /* Build up a decent error message. We include the problem, the
 315              * full input, and underline the position where the parser
 316              * currently is. */
 317             char *errormessage;
 318             char *possible_tokens = malloc(tokenlen + 1);
 319             char *tokenwalk = possible_tokens;
 320             for (c = 0; c < ptr->n; c++) {
 321                 token = &(ptr->array[c]);
 322                 if (token->name[0] == '\'') {
 323                     /* A literal is copied to the error message enclosed with
 324                      * single quotes. */
 325                     *tokenwalk++ = '\'';
 326                     strcpy(tokenwalk, token->name + 1);
 327                     tokenwalk += strlen(token->name + 1);
 328                     *tokenwalk++ = '\'';
 329                 } else {
 330                     /* Any other token is copied to the error message enclosed
 331                      * with angle brackets. */
 332                     *tokenwalk++ = '<';
 333                     strcpy(tokenwalk, token->name);
 334                     tokenwalk += strlen(token->name);
 335                     *tokenwalk++ = '>';
 336                 }
 337                 if (c < (ptr->n - 1)) {
 338                     *tokenwalk++ = ',';
 339                     *tokenwalk++ = ' ';
 340                 }
 341             }
 342             *tokenwalk = '\0';
 343             asprintf(&errormessage, "Expected one of these tokens: %s",
 344                      possible_tokens);
 345             free(possible_tokens);
 346
 347             /* Contains the same amount of characters as 'input' has, but with
 348              * the unparseable part highlighted using ^ characters. */
 349             char *position = malloc(len + 1);
 350             for (const char *copywalk = input; *copywalk != '\0'; copywalk++)
 351                 position[(copywalk - input)] = (copywalk >= walk ? '^' : ' ');
 352             position[len] = '\0';
 353
 354             printf("%s\n", errormessage);
 355             printf("Your command: %s\n", input);
 356             printf("              %s\n", position);
 357
 358             free(position);
 359             free(errormessage);
 360             break;
 361         }
 362     }
 363
 364     DLOG("json_output = %s\n", json_output);
 365     return json_output;
 366 }
 367
 368 /*******************************************************************************
 369  * Code for building the stand-alone binary test.commands_parser which is used
 370  * by t/187-commands-parser.t.
 371  ******************************************************************************/
 372
 373 #ifdef TEST_PARSER
 374
 375 /*
 376  * Logs the given message to stdout while prefixing the current time to it,
 377  * but only if the corresponding debug loglevel was activated.
 378  * This is to be called by DLOG() which includes filename/linenumber
 379  *
 380  */
 381 void debuglog(uint64_t lev, char *fmt, ...) {
 382     va_list args;
 383
 384     va_start(args, fmt);
 385     fprintf(stdout, "# ");
 386     vfprintf(stdout, fmt, args);
 387     va_end(args);
 388 }
 389
 390 int main(int argc, char *argv[]) {
 391     if (argc < 2) {
 392         fprintf(stderr, "Syntax: %s <command>\n", argv[0]);
 393         return 1;
 394     }
 395     parse_command(argv[1]);
 396 }
 397 #endif