2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2015 Kern Sibbald
5 Copyright (C) 2000-2014 Free Software Foundation Europe e.V.
7 The original author of Bacula is Kern Sibbald, with contributions
8 from many others, a complete list can be found in the file AUTHORS.
10 You may use this file and others of this release according to the
11 license defined in the LICENSE file, which includes the Affero General
12 Public License, v3.0 ("AGPLv3") and some additional permissions and
13 terms pursuant to its AGPLv3 Section 7.
15 This notice must be preserved when any source code is
16 conveyed and/or propagated.
18 Bacula(R) is a registered trademark of Kern Sibbald.
21 * Lexical scanner for Bacula configuration file
30 /* Debug level for this source file */
31 static const int dbglvl = 5000;
34 * Scan to "logical" end of line. I.e. end of line,
35 * or semicolon, but stop on T_EOB (same as end of
36 * line except it is not eaten).
38 void scan_to_eol(LEX *lc)
41 Dmsg0(dbglvl, "start scan to eol\n");
42 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
54 * Get next token, but skip EOL
56 int scan_to_next_not_eol(LEX * lc)
60 token = lex_get_token(lc, T_ALL);
61 } while (token == T_EOL);
66 * Format a scanner error message
68 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
74 va_start(arg_ptr, msg);
75 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
78 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
79 lc->err_type = M_ERROR_TERM;
82 if (lc->line_no > lc->begin_line_no) {
83 bsnprintf(more, sizeof(more),
84 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
88 if (lc->line_no > 0) {
89 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
90 " : line %d, col %d of file %s\n%s\n%s"),
91 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
93 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
97 void lex_set_default_error_handler(LEX *lf)
99 lf->scan_error = s_err;
103 * Set err_type used in error_handler
104 * return the old value
106 int lex_set_error_handler_error_type(LEX *lf, int err_type)
108 int old = lf->err_type;
109 lf->err_type = err_type;
114 * Free the current file, and retrieve the contents
115 * of the previous packet if any.
117 LEX *lex_close_file(LEX *lf)
122 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
124 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
128 close_bpipe(lf->bpipe);
133 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
137 free_memory(lf->line);
139 free_memory(lf->str);
142 of->options = lf->options; /* preserve options */
143 memcpy(lf, of, sizeof(LEX));
144 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
156 * Open a configuration in memory buffer. We push the
157 * state of the current file (lf) so that we
158 * can do includes. This is a bit of a hammer.
159 * Instead of passing back the pointer to the
160 * new packet, I simply replace the contents
161 * of the caller's packet with the new packet,
162 * and link the contents of the old packet into
166 LEX *lex_open_buf(LEX *lf, const char *buffer, LEX_ERROR_HANDLER *scan_error)
171 Dmsg0(400, "Open config buffer\n");
172 nf = (LEX *)malloc(sizeof(LEX));
174 memcpy(nf, lf, sizeof(LEX));
175 memset(lf, 0, sizeof(LEX));
176 lf->next = nf; /* if have lf, push it behind new one */
177 lf->options = nf->options; /* preserve user options */
179 * preserve err_type to prevent bacula exiting on 'reload'
180 * if config is invalid. Fixes bug #877
182 lf->err_type = nf->err_type;
184 lf = nf; /* start new packet */
185 memset(lf, 0, sizeof(LEX));
186 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
189 lf->scan_error = scan_error;
191 lex_set_default_error_handler(lf);
196 lf->line = get_memory(5000);
197 pm_strcpy(lf->line, buffer);
198 pm_strcat(lf->line, "");
199 lf->state = lex_none;
201 lf->str = get_memory(5000);
206 * Open a new configuration file. We push the
207 * state of the current file (lf) so that we
208 * can do includes. This is a bit of a hammer.
209 * Instead of passing back the pointer to the
210 * new packet, I simply replace the contents
211 * of the caller's packet with the new packet,
212 * and link the contents of the old packet into
216 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
222 char *fname = bstrdup(filename);
224 if (fname[0] == '|') {
225 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
230 } else if ((fd = fopen(fname, "rb")) == NULL) {
234 Dmsg1(400, "Open config file: %s\n", fname);
235 nf = (LEX *)malloc(sizeof(LEX));
237 memcpy(nf, lf, sizeof(LEX));
238 memset(lf, 0, sizeof(LEX));
239 lf->next = nf; /* if have lf, push it behind new one */
240 lf->options = nf->options; /* preserve user options */
242 * preserve err_type to prevent bacula exiting on 'reload'
243 * if config is invalid. Fixes bug #877
245 lf->err_type = nf->err_type;
247 lf = nf; /* start new packet */
248 memset(lf, 0, sizeof(LEX));
249 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
252 lf->scan_error = scan_error;
254 lex_set_default_error_handler(lf);
259 lf->line = get_memory(5000);
260 lf->state = lex_none;
262 lf->str = get_memory(5000);
263 Dmsg1(dbglvl, "Return lex=%x\n", lf);
268 * Get the next character from the input.
269 * Returns the character or
270 * L_EOF if end of file
271 * L_EOL if end of line
273 int lex_get_char(LEX *lf)
275 if (lf->ch == L_EOF) {
276 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
277 " You may have a open double quote without the closing double quote.\n"));
279 if (lf->fd && lf->ch == L_EOL) {
280 if (bfgets(lf->line, lf->fd) == NULL) {
289 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
290 } else if (lf->ch == L_EOL) {
294 lf->ch = (uint8_t)lf->line[lf->col_no];
297 lf->ch = L_EOL; /* reached end of line, force bfgets */
302 if (lf->ch == 0) { /* End of buffer, stop scan */
308 } else if (lf->ch == '\n') { /* End of line */
309 Dmsg0(dbglvl, "Found newline return L_EOL\n");
315 Dmsg3(dbglvl, "lex_get_char: %c %d col=%d\n", lf->ch, lf->ch, lf->col_no);
319 void lex_unget_char(LEX *lf)
321 if (lf->ch == L_EOL) {
322 lf->ch = 0; /* End of line, force read of next one */
324 lf->col_no--; /* Backup to re-read char */
330 * Add a character to the current string
332 static void add_str(LEX *lf, int ch)
334 if (lf->str_len >= sizeof_pool_memory(lf->str)) {
335 Emsg3(M_ERROR_TERM, 0, _(
336 _("Config token too long, file: %s, line %d, begins at line %d\n")),
337 lf->fname, lf->line_no, lf->begin_line_no);
339 lf->str[lf->str_len++] = ch;
340 lf->str[lf->str_len] = 0;
346 static void begin_str(LEX *lf, int ch)
353 lf->begin_line_no = lf->line_no; /* save start string line no */
357 static const char *lex_state_to_str(int state)
360 case lex_none: return _("none");
361 case lex_comment: return _("comment");
362 case lex_number: return _("number");
363 case lex_ip_addr: return _("ip_addr");
364 case lex_identifier: return _("identifier");
365 case lex_string: return _("string");
366 case lex_quoted_string: return _("quoted_string");
367 case lex_include: return _("include");
368 case lex_include_quoted_string: return _("include_quoted_string");
369 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
370 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
371 default: return "??????";
377 * Convert a lex token to a string
378 * used for debug/error printing.
380 const char *lex_tok_to_str(int token)
383 case L_EOF: return "L_EOF";
384 case L_EOL: return "L_EOL";
385 case T_NONE: return "T_NONE";
386 case T_NUMBER: return "T_NUMBER";
387 case T_IPADDR: return "T_IPADDR";
388 case T_IDENTIFIER: return "T_IDENTIFIER";
389 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
390 case T_QUOTED_STRING: return "T_QUOTED_STRING";
391 case T_BOB: return "T_BOB";
392 case T_EOB: return "T_EOB";
393 case T_EQUALS: return "T_EQUALS";
394 case T_ERROR: return "T_ERROR";
395 case T_EOF: return "T_EOF";
396 case T_COMMA: return "T_COMMA";
397 case T_EOL: return "T_EOL";
398 case T_UTF8_BOM: return "T_UTF8_BOM";
399 case T_UTF16_BOM: return "T_UTF16_BOM";
400 default: return "??????";
404 static uint32_t scan_pint(LEX *lf, char *str)
407 if (!is_a_number(str)) {
408 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
412 val = str_to_int64(str);
413 if (errno != 0 || val < 0) {
414 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
418 return (uint32_t)val;
421 static uint64_t scan_pint64(LEX *lf, char *str)
424 if (!is_a_number(str)) {
425 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
429 val = str_to_uint64(str);
431 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
440 * Get the next token from the input
444 lex_get_token(LEX *lf, int expect)
448 bool esc_next = false;
449 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
450 to indicate which transmission format the file is in. The codepoint for
451 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
452 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
453 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
454 to tell which byte we are expecting. */
455 int bom_bytes_seen = 0;
457 Dmsg1(dbglvl, "enter lex_get_token state=%s\n", lex_state_to_str(lf->state));
458 while (token == T_NONE) {
459 ch = lex_get_char(lf);
462 Dmsg2(dbglvl, "Lex state lex_none ch=%c,%d\n", ch, ch);
466 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
467 lf->state = lex_string;
469 lf->state = lex_identifier;
475 if (lf->options & LOPT_STRING) {
476 lf->state = lex_string;
478 lf->state = lex_number;
483 Dmsg0(dbglvl, "Enter lex_none switch\n");
487 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
490 lf->state = lex_comment;
501 lf->state = lex_quoted_string;
513 if (expect != T_SKIP_EOL) {
514 token = T_EOL; /* treat ; like EOL */
518 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
519 if (expect != T_SKIP_EOL) {
524 /* In NO_EXTERN mode, @ is part of a string */
525 if (lf->options & LOPT_NO_EXTERN) {
526 lf->state = lex_string;
529 lf->state = lex_include;
533 case 0xEF: /* probably a UTF-8 BOM */
534 case 0xFF: /* probably a UTF-16le BOM */
535 case 0xFE: /* probably a UTF-16be BOM (error)*/
536 if (lf->line_no != 1 || lf->col_no != 1)
538 lf->state = lex_string;
543 lf->state = lex_utf8_bom;
544 } else if (ch == 0xFF) {
545 lf->state = lex_utf16_le_bom;
547 scan_err0(lf, _("This config file appears to be in an "
548 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
554 lf->state = lex_string;
560 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
562 lf->state = lex_none;
563 if (expect != T_SKIP_EOL) {
566 } else if (ch == L_EOF) {
571 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
576 /* Might want to allow trailing specifications here */
582 /* A valid number can be terminated by the following */
583 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
585 lf->state = lex_none;
587 lf->state = lex_string;
596 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
599 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
604 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
605 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
607 token = T_UNQUOTED_STRING;
608 lf->state = lex_none;
614 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
618 } else if (B_ISSPACE(ch)) {
620 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
621 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
623 token = T_IDENTIFIER;
624 lf->state = lex_none;
626 } else if (ch == L_EOF) {
628 lf->state = lex_none;
632 /* Some non-alpha character => string */
633 lf->state = lex_string;
636 case lex_quoted_string:
637 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
656 token = T_QUOTED_STRING;
658 * Since we may be scanning a quoted list of names,
659 * we get the next character (a comma indicates another
660 * one), then we put it back for rescanning.
664 lf->state = lex_none;
669 case lex_include_quoted_string:
684 /* Keep the original LEX so we can print an error if the included file can't be opened. */
686 /* Skip the double quote when restarting parsing */
689 lf->state = lex_none;
690 lf = lex_open_file(lf, lf->str, lf->scan_error);
693 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
694 lfori->str, be.bstrerror());
701 case lex_include: /* scanning a filename */
707 lf->state = lex_include_quoted_string;
712 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
713 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
714 /* Keep the original LEX so we can print an error if the included file can't be opened. */
717 lf->state = lex_none;
718 lf = lex_open_file(lf, lf->str, lf->scan_error);
721 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
722 lfori->str, be.bstrerror());
730 /* we only end up in this state if we have read an 0xEF
731 as the first byte of the file, indicating we are probably
732 reading a UTF-8 file */
733 if (ch == 0xBB && bom_bytes_seen == 1) {
735 } else if (ch == 0xBF && bom_bytes_seen == 2) {
737 lf->state = lex_none;
742 case lex_utf16_le_bom:
743 /* we only end up in this state if we have read an 0xFF
744 as the first byte of the file -- indicating that we are
745 probably dealing with an Intel based (little endian) UTF-16 file*/
748 lf->state = lex_none;
754 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
755 lex_tok_to_str(token), ch);
757 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
761 * Here is where we check to see if the user has set certain
762 * expectations (e.g. 32 bit integer). If so, we do type checking
763 * and possible additional scanning (e.g. for range).
767 lf->pint32_val = scan_pint(lf, lf->str);
768 lf->pint32_val2 = lf->pint32_val;
773 if (token == T_NUMBER) {
774 lf->pint32_val = scan_pint(lf, lf->str);
775 lf->pint32_val2 = lf->pint32_val;
778 char *p = strchr(lf->str, '-');
780 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
781 lex_tok_to_str(token), lf->str);
785 *p++ = 0; /* terminate first half of range */
786 lf->pint32_val = scan_pint(lf, lf->str);
787 lf->pint32_val2 = scan_pint(lf, p);
788 token = T_PINT32_RANGE;
793 if (token != T_NUMBER || !is_a_number(lf->str)) {
794 scan_err2(lf, _("expected an integer number, got %s: %s"),
795 lex_tok_to_str(token), lf->str);
800 lf->int32_val = (int32_t)str_to_int64(lf->str);
802 scan_err2(lf, _("expected an integer number, got %s: %s"),
803 lex_tok_to_str(token), lf->str);
811 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
812 if (token != T_NUMBER || !is_a_number(lf->str)) {
813 scan_err2(lf, _("expected an integer number, got %s: %s"),
814 lex_tok_to_str(token), lf->str);
819 lf->int64_val = str_to_int64(lf->str);
821 scan_err2(lf, _("expected an integer number, got %s: %s"),
822 lex_tok_to_str(token), lf->str);
830 if (token == T_NUMBER) {
831 lf->pint64_val = scan_pint64(lf, lf->str);
832 lf->pint64_val2 = lf->pint64_val;
835 char *p = strchr(lf->str, '-');
837 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
838 lex_tok_to_str(token), lf->str);
842 *p++ = 0; /* terminate first half of range */
843 lf->pint64_val = scan_pint64(lf, lf->str);
844 lf->pint64_val2 = scan_pint64(lf, p);
845 token = T_PINT64_RANGE;
850 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
851 scan_err2(lf, _("expected a name, got %s: %s"),
852 lex_tok_to_str(token), lf->str);
854 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
855 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
856 lf->str_len, MAX_RES_NAME_LENGTH);
862 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
863 scan_err2(lf, _("expected a string, got %s: %s"),
864 lex_tok_to_str(token), lf->str);
873 break; /* no expectation given */
875 lf->token = token; /* set possible new token */