2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Lexical scanner for Bacula configuration file
38 extern int debug_level;
40 /* Debug level for this source file */
41 static const int dbglvl = 5000;
44 * Scan to "logical" end of line. I.e. end of line,
45 * or semicolon, but stop on T_EOB (same as end of
46 * line except it is not eaten).
48 void scan_to_eol(LEX *lc)
51 Dmsg0(dbglvl, "start scan to eof\n");
52 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
61 * Get next token, but skip EOL
63 int scan_to_next_not_eol(LEX * lc)
67 token = lex_get_token(lc, T_ALL);
68 } while (token == T_EOL);
73 * Format a scanner error message
75 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
81 va_start(arg_ptr, msg);
82 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
85 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
86 lc->err_type = M_ERROR_TERM;
89 if (lc->line_no > lc->begin_line_no) {
90 bsnprintf(more, sizeof(more),
91 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
95 if (lc->line_no > 0) {
96 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
97 " : line %d, col %d of file %s\n%s\n%s"),
98 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
100 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
104 void lex_set_default_error_handler(LEX *lf)
106 lf->scan_error = s_err;
110 * Set err_type used in error_handler
111 * return the old value
113 int lex_set_error_handler_error_type(LEX *lf, int err_type)
115 int old = lf->err_type;
116 lf->err_type = err_type;
121 * Free the current file, and retrieve the contents
122 * of the previous packet if any.
124 LEX *lex_close_file(LEX *lf)
129 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
131 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
135 close_bpipe(lf->bpipe);
140 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
143 of->options = lf->options; /* preserve options */
144 memcpy(lf, of, sizeof(LEX));
145 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
155 * Open a new configuration file. We push the
156 * state of the current file (lf) so that we
157 * can do includes. This is a bit of a hammer.
158 * Instead of passing back the pointer to the
159 * new packet, I simply replace the contents
160 * of the caller's packet with the new packet,
161 * and link the contents of the old packet into
165 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
171 char *fname = bstrdup(filename);
174 if (fname[0] == '|') {
175 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
180 } else if ((fd = fopen(fname, "rb")) == NULL) {
184 Dmsg1(400, "Open config file: %s\n", fname);
185 nf = (LEX *)malloc(sizeof(LEX));
187 memcpy(nf, lf, sizeof(LEX));
188 memset(lf, 0, sizeof(LEX));
189 lf->next = nf; /* if have lf, push it behind new one */
190 lf->options = nf->options; /* preserve user options */
192 * preserve err_type to prevent bacula exiting on 'reload'
193 * if config is invalid. Fixes bug #877
195 lf->err_type = nf->err_type;
197 lf = nf; /* start new packet */
198 memset(lf, 0, sizeof(LEX));
199 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
202 lf->scan_error = scan_error;
204 lex_set_default_error_handler(lf);
209 lf->state = lex_none;
211 Dmsg1(dbglvl, "Return lex=%x\n", lf);
216 * Get the next character from the input.
217 * Returns the character or
218 * L_EOF if end of file
219 * L_EOL if end of line
221 int lex_get_char(LEX *lf)
223 if (lf->ch == L_EOF) {
224 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
225 " You may have a open double quote without the closing double quote.\n"));
227 if (lf->ch == L_EOL) {
228 if (bfgets(lf->line, MAXSTRING, lf->fd) == NULL) {
237 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
239 lf->ch = (uint8_t)lf->line[lf->col_no];
245 Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch);
249 void lex_unget_char(LEX *lf)
251 if (lf->ch == L_EOL) {
252 lf->ch = 0; /* End of line, force read of next one */
254 lf->col_no--; /* Backup to re-read char */
261 * Add a character to the current string
263 static void add_str(LEX *lf, int ch)
265 if (lf->str_len >= MAXSTRING-3) {
266 Emsg3(M_ERROR_TERM, 0, _(
267 _("Config token too long, file: %s, line %d, begins at line %d\n")),
268 lf->fname, lf->line_no, lf->begin_line_no);
270 lf->str[lf->str_len++] = ch;
271 lf->str[lf->str_len] = 0;
277 static void begin_str(LEX *lf, int ch)
284 lf->begin_line_no = lf->line_no; /* save start string line no */
288 static const char *lex_state_to_str(int state)
291 case lex_none: return _("none");
292 case lex_comment: return _("comment");
293 case lex_number: return _("number");
294 case lex_ip_addr: return _("ip_addr");
295 case lex_identifier: return _("identifier");
296 case lex_string: return _("string");
297 case lex_quoted_string: return _("quoted_string");
298 case lex_include: return _("include");
299 case lex_include_quoted_string: return _("include_quoted_string");
300 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
301 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
302 default: return "??????";
308 * Convert a lex token to a string
309 * used for debug/error printing.
311 const char *lex_tok_to_str(int token)
314 case L_EOF: return "L_EOF";
315 case L_EOL: return "L_EOL";
316 case T_NONE: return "T_NONE";
317 case T_NUMBER: return "T_NUMBER";
318 case T_IPADDR: return "T_IPADDR";
319 case T_IDENTIFIER: return "T_IDENTIFIER";
320 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
321 case T_QUOTED_STRING: return "T_QUOTED_STRING";
322 case T_BOB: return "T_BOB";
323 case T_EOB: return "T_EOB";
324 case T_EQUALS: return "T_EQUALS";
325 case T_ERROR: return "T_ERROR";
326 case T_EOF: return "T_EOF";
327 case T_COMMA: return "T_COMMA";
328 case T_EOL: return "T_EOL";
329 case T_UTF8_BOM: return "T_UTF8_BOM";
330 case T_UTF16_BOM: return "T_UTF16_BOM";
331 default: return "??????";
335 static uint32_t scan_pint(LEX *lf, char *str)
338 if (!is_a_number(str)) {
339 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
343 val = str_to_int64(str);
344 if (errno != 0 || val < 0) {
345 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
349 return (uint32_t)val;
352 static uint64_t scan_pint64(LEX *lf, char *str)
355 if (!is_a_number(str)) {
356 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
360 val = str_to_uint64(str);
362 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
371 * Get the next token from the input
375 lex_get_token(LEX *lf, int expect)
379 bool esc_next = false;
380 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
381 to indicate which transmission format the file is in. The codepoint for
382 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
383 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
384 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
385 to tell which byte we are expecting. */
386 int bom_bytes_seen = 0;
388 Dmsg0(dbglvl, "enter lex_get_token\n");
389 while (token == T_NONE) {
390 ch = lex_get_char(lf);
393 Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
397 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
398 lf->state = lex_string;
400 lf->state = lex_identifier;
406 if (lf->options & LOPT_STRING) {
407 lf->state = lex_string;
409 lf->state = lex_number;
414 Dmsg0(dbglvl, "Enter lex_none switch\n");
418 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
421 lf->state = lex_comment;
432 lf->state = lex_quoted_string;
444 if (expect != T_SKIP_EOL) {
445 token = T_EOL; /* treat ; like EOL */
449 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
450 if (expect != T_SKIP_EOL) {
455 /* In NO_EXTERN mode, @ is part of a string */
456 if (lf->options & LOPT_NO_EXTERN) {
457 lf->state = lex_string;
460 lf->state = lex_include;
464 case 0xEF: /* probably a UTF-8 BOM */
465 case 0xFF: /* probably a UTF-16le BOM */
466 case 0xFE: /* probably a UTF-16be BOM (error)*/
467 if (lf->line_no != 1 || lf->col_no != 1)
469 lf->state = lex_string;
474 lf->state = lex_utf8_bom;
475 } else if (ch == 0xFF) {
476 lf->state = lex_utf16_le_bom;
478 scan_err0(lf, _("This config file appears to be in an "
479 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
485 lf->state = lex_string;
491 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
493 lf->state = lex_none;
494 if (expect != T_SKIP_EOL) {
497 } else if (ch == L_EOF) {
502 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
507 /* Might want to allow trailing specifications here */
513 /* A valid number can be terminated by the following */
514 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
516 lf->state = lex_none;
518 lf->state = lex_string;
527 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
530 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
535 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
536 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
538 token = T_UNQUOTED_STRING;
539 lf->state = lex_none;
545 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
549 } else if (B_ISSPACE(ch)) {
551 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
552 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
554 token = T_IDENTIFIER;
555 lf->state = lex_none;
557 } else if (ch == L_EOF) {
559 lf->state = lex_none;
563 /* Some non-alpha character => string */
564 lf->state = lex_string;
567 case lex_quoted_string:
568 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
587 token = T_QUOTED_STRING;
588 lf->state = lex_none;
593 case lex_include_quoted_string:
608 /* Keep the original LEX so we can print an error if the included file can't be opened. */
610 /* Skip the double quote when restarting parsing */
613 lf->state = lex_none;
614 lf = lex_open_file(lf, lf->str, lf->scan_error);
617 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
618 lfori->str, be.bstrerror());
625 case lex_include: /* scanning a filename */
631 lf->state = lex_include_quoted_string;
636 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
637 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
638 /* Keep the original LEX so we can print an error if the included file can't be opened. */
641 lf->state = lex_none;
642 lf = lex_open_file(lf, lf->str, lf->scan_error);
645 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
646 lfori->str, be.bstrerror());
654 /* we only end up in this state if we have read an 0xEF
655 as the first byte of the file, indicating we are probably
656 reading a UTF-8 file */
657 if (ch == 0xBB && bom_bytes_seen == 1) {
659 } else if (ch == 0xBF && bom_bytes_seen == 2) {
661 lf->state = lex_none;
666 case lex_utf16_le_bom:
667 /* we only end up in this state if we have read an 0xFF
668 as the first byte of the file -- indicating that we are
669 probably dealing with an Intel based (little endian) UTF-16 file*/
672 lf->state = lex_none;
678 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
679 lex_tok_to_str(token), ch);
681 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
685 * Here is where we check to see if the user has set certain
686 * expectations (e.g. 32 bit integer). If so, we do type checking
687 * and possible additional scanning (e.g. for range).
691 lf->pint32_val = scan_pint(lf, lf->str);
692 lf->pint32_val2 = lf->pint32_val;
697 if (token == T_NUMBER) {
698 lf->pint32_val = scan_pint(lf, lf->str);
699 lf->pint32_val2 = lf->pint32_val;
702 char *p = strchr(lf->str, '-');
704 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
705 lex_tok_to_str(token), lf->str);
709 *p++ = 0; /* terminate first half of range */
710 lf->pint32_val = scan_pint(lf, lf->str);
711 lf->pint32_val2 = scan_pint(lf, p);
712 token = T_PINT32_RANGE;
717 if (token != T_NUMBER || !is_a_number(lf->str)) {
718 scan_err2(lf, _("expected an integer number, got %s: %s"),
719 lex_tok_to_str(token), lf->str);
724 lf->int32_val = (int32_t)str_to_int64(lf->str);
726 scan_err2(lf, _("expected an integer number, got %s: %s"),
727 lex_tok_to_str(token), lf->str);
735 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
736 if (token != T_NUMBER || !is_a_number(lf->str)) {
737 scan_err2(lf, _("expected an integer number, got %s: %s"),
738 lex_tok_to_str(token), lf->str);
743 lf->int64_val = str_to_int64(lf->str);
745 scan_err2(lf, _("expected an integer number, got %s: %s"),
746 lex_tok_to_str(token), lf->str);
754 if (token == T_NUMBER) {
755 lf->pint64_val = scan_pint64(lf, lf->str);
756 lf->pint64_val2 = lf->pint64_val;
759 char *p = strchr(lf->str, '-');
761 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
762 lex_tok_to_str(token), lf->str);
766 *p++ = 0; /* terminate first half of range */
767 lf->pint64_val = scan_pint64(lf, lf->str);
768 lf->pint64_val2 = scan_pint64(lf, p);
769 token = T_PINT64_RANGE;
774 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
775 scan_err2(lf, _("expected a name, got %s: %s"),
776 lex_tok_to_str(token), lf->str);
778 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
779 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
780 lf->str_len, MAX_RES_NAME_LENGTH);
786 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
787 scan_err2(lf, _("expected a string, got %s: %s"),
788 lex_tok_to_str(token), lf->str);
797 break; /* no expectation given */
799 lf->token = token; /* set possible new token */