2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Lexical scanner for Bacula configuration file
38 extern int debug_level;
40 /* Debug level for this source file */
41 static const int dbglvl = 5000;
44 * Scan to "logical" end of line. I.e. end of line,
45 * or semicolon, but stop on T_EOB (same as end of
46 * line except it is not eaten).
48 void scan_to_eol(LEX *lc)
51 Dmsg0(dbglvl, "start scan to eof\n");
52 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
61 * Get next token, but skip EOL
63 int scan_to_next_not_eol(LEX * lc)
67 token = lex_get_token(lc, T_ALL);
68 } while (token == T_EOL);
73 * Format a scanner error message
75 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
81 va_start(arg_ptr, msg);
82 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
85 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
86 lc->err_type = M_ERROR_TERM;
89 if (lc->line_no > lc->begin_line_no) {
90 bsnprintf(more, sizeof(more),
91 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
95 if (lc->line_no > 0) {
96 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
97 " : line %d, col %d of file %s\n%s\n%s"),
98 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
100 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
104 void lex_set_default_error_handler(LEX *lf)
106 lf->scan_error = s_err;
110 * Set err_type used in error_handler
111 * return the old value
113 int lex_set_error_handler_error_type(LEX *lf, int err_type)
115 int old = lf->err_type;
116 lf->err_type = err_type;
121 * Free the current file, and retrieve the contents
122 * of the previous packet if any.
124 LEX *lex_close_file(LEX *lf)
129 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
131 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
135 close_bpipe(lf->bpipe);
140 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
142 free_memory(lf->line);
145 of->options = lf->options; /* preserve options */
146 memcpy(lf, of, sizeof(LEX));
147 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
157 * Open a new configuration file. We push the
158 * state of the current file (lf) so that we
159 * can do includes. This is a bit of a hammer.
160 * Instead of passing back the pointer to the
161 * new packet, I simply replace the contents
162 * of the caller's packet with the new packet,
163 * and link the contents of the old packet into
167 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
173 char *fname = bstrdup(filename);
175 if (fname[0] == '|') {
176 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
181 } else if ((fd = fopen(fname, "rb")) == NULL) {
185 Dmsg1(400, "Open config file: %s\n", fname);
186 nf = (LEX *)malloc(sizeof(LEX));
188 memcpy(nf, lf, sizeof(LEX));
189 memset(lf, 0, sizeof(LEX));
190 lf->next = nf; /* if have lf, push it behind new one */
191 lf->options = nf->options; /* preserve user options */
193 * preserve err_type to prevent bacula exiting on 'reload'
194 * if config is invalid. Fixes bug #877
196 lf->err_type = nf->err_type;
198 lf = nf; /* start new packet */
199 memset(lf, 0, sizeof(LEX));
200 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
203 lf->scan_error = scan_error;
205 lex_set_default_error_handler(lf);
210 lf->line = get_memory(5000);
211 lf->state = lex_none;
213 Dmsg1(dbglvl, "Return lex=%x\n", lf);
218 * Get the next character from the input.
219 * Returns the character or
220 * L_EOF if end of file
221 * L_EOL if end of line
223 int lex_get_char(LEX *lf)
225 if (lf->ch == L_EOF) {
226 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
227 " You may have a open double quote without the closing double quote.\n"));
229 if (lf->ch == L_EOL) {
230 if (bfgets(lf->line, lf->fd) == NULL) {
239 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
241 lf->ch = (uint8_t)lf->line[lf->col_no];
247 Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch);
251 void lex_unget_char(LEX *lf)
253 if (lf->ch == L_EOL) {
254 lf->ch = 0; /* End of line, force read of next one */
256 lf->col_no--; /* Backup to re-read char */
262 * Add a character to the current string
264 static void add_str(LEX *lf, int ch)
266 if (lf->str_len >= MAXSTRING-3) {
267 Emsg3(M_ERROR_TERM, 0, _(
268 _("Config token too long, file: %s, line %d, begins at line %d\n")),
269 lf->fname, lf->line_no, lf->begin_line_no);
271 lf->str[lf->str_len++] = ch;
272 lf->str[lf->str_len] = 0;
278 static void begin_str(LEX *lf, int ch)
285 lf->begin_line_no = lf->line_no; /* save start string line no */
289 static const char *lex_state_to_str(int state)
292 case lex_none: return _("none");
293 case lex_comment: return _("comment");
294 case lex_number: return _("number");
295 case lex_ip_addr: return _("ip_addr");
296 case lex_identifier: return _("identifier");
297 case lex_string: return _("string");
298 case lex_quoted_string: return _("quoted_string");
299 case lex_include: return _("include");
300 case lex_include_quoted_string: return _("include_quoted_string");
301 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
302 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
303 default: return "??????";
309 * Convert a lex token to a string
310 * used for debug/error printing.
312 const char *lex_tok_to_str(int token)
315 case L_EOF: return "L_EOF";
316 case L_EOL: return "L_EOL";
317 case T_NONE: return "T_NONE";
318 case T_NUMBER: return "T_NUMBER";
319 case T_IPADDR: return "T_IPADDR";
320 case T_IDENTIFIER: return "T_IDENTIFIER";
321 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
322 case T_QUOTED_STRING: return "T_QUOTED_STRING";
323 case T_BOB: return "T_BOB";
324 case T_EOB: return "T_EOB";
325 case T_EQUALS: return "T_EQUALS";
326 case T_ERROR: return "T_ERROR";
327 case T_EOF: return "T_EOF";
328 case T_COMMA: return "T_COMMA";
329 case T_EOL: return "T_EOL";
330 case T_UTF8_BOM: return "T_UTF8_BOM";
331 case T_UTF16_BOM: return "T_UTF16_BOM";
332 default: return "??????";
336 static uint32_t scan_pint(LEX *lf, char *str)
339 if (!is_a_number(str)) {
340 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
344 val = str_to_int64(str);
345 if (errno != 0 || val < 0) {
346 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
350 return (uint32_t)val;
353 static uint64_t scan_pint64(LEX *lf, char *str)
356 if (!is_a_number(str)) {
357 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
361 val = str_to_uint64(str);
363 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
372 * Get the next token from the input
376 lex_get_token(LEX *lf, int expect)
380 bool esc_next = false;
381 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
382 to indicate which transmission format the file is in. The codepoint for
383 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
384 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
385 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
386 to tell which byte we are expecting. */
387 int bom_bytes_seen = 0;
389 Dmsg0(dbglvl, "enter lex_get_token\n");
390 while (token == T_NONE) {
391 ch = lex_get_char(lf);
394 Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
398 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
399 lf->state = lex_string;
401 lf->state = lex_identifier;
407 if (lf->options & LOPT_STRING) {
408 lf->state = lex_string;
410 lf->state = lex_number;
415 Dmsg0(dbglvl, "Enter lex_none switch\n");
419 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
422 lf->state = lex_comment;
433 lf->state = lex_quoted_string;
445 if (expect != T_SKIP_EOL) {
446 token = T_EOL; /* treat ; like EOL */
450 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
451 if (expect != T_SKIP_EOL) {
456 /* In NO_EXTERN mode, @ is part of a string */
457 if (lf->options & LOPT_NO_EXTERN) {
458 lf->state = lex_string;
461 lf->state = lex_include;
465 case 0xEF: /* probably a UTF-8 BOM */
466 case 0xFF: /* probably a UTF-16le BOM */
467 case 0xFE: /* probably a UTF-16be BOM (error)*/
468 if (lf->line_no != 1 || lf->col_no != 1)
470 lf->state = lex_string;
475 lf->state = lex_utf8_bom;
476 } else if (ch == 0xFF) {
477 lf->state = lex_utf16_le_bom;
479 scan_err0(lf, _("This config file appears to be in an "
480 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
486 lf->state = lex_string;
492 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
494 lf->state = lex_none;
495 if (expect != T_SKIP_EOL) {
498 } else if (ch == L_EOF) {
503 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
508 /* Might want to allow trailing specifications here */
514 /* A valid number can be terminated by the following */
515 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
517 lf->state = lex_none;
519 lf->state = lex_string;
528 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
531 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
536 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
537 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
539 token = T_UNQUOTED_STRING;
540 lf->state = lex_none;
546 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
550 } else if (B_ISSPACE(ch)) {
552 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
553 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
555 token = T_IDENTIFIER;
556 lf->state = lex_none;
558 } else if (ch == L_EOF) {
560 lf->state = lex_none;
564 /* Some non-alpha character => string */
565 lf->state = lex_string;
568 case lex_quoted_string:
569 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
588 token = T_QUOTED_STRING;
590 * Since we may be scanning a quoted list of names,
591 * we get the next character (a comma indicates another
592 * one), then we put it back for rescanning.
596 lf->state = lex_none;
601 case lex_include_quoted_string:
616 /* Keep the original LEX so we can print an error if the included file can't be opened. */
618 /* Skip the double quote when restarting parsing */
621 lf->state = lex_none;
622 lf = lex_open_file(lf, lf->str, lf->scan_error);
625 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
626 lfori->str, be.bstrerror());
633 case lex_include: /* scanning a filename */
639 lf->state = lex_include_quoted_string;
644 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
645 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
646 /* Keep the original LEX so we can print an error if the included file can't be opened. */
649 lf->state = lex_none;
650 lf = lex_open_file(lf, lf->str, lf->scan_error);
653 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
654 lfori->str, be.bstrerror());
662 /* we only end up in this state if we have read an 0xEF
663 as the first byte of the file, indicating we are probably
664 reading a UTF-8 file */
665 if (ch == 0xBB && bom_bytes_seen == 1) {
667 } else if (ch == 0xBF && bom_bytes_seen == 2) {
669 lf->state = lex_none;
674 case lex_utf16_le_bom:
675 /* we only end up in this state if we have read an 0xFF
676 as the first byte of the file -- indicating that we are
677 probably dealing with an Intel based (little endian) UTF-16 file*/
680 lf->state = lex_none;
686 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
687 lex_tok_to_str(token), ch);
689 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
693 * Here is where we check to see if the user has set certain
694 * expectations (e.g. 32 bit integer). If so, we do type checking
695 * and possible additional scanning (e.g. for range).
699 lf->pint32_val = scan_pint(lf, lf->str);
700 lf->pint32_val2 = lf->pint32_val;
705 if (token == T_NUMBER) {
706 lf->pint32_val = scan_pint(lf, lf->str);
707 lf->pint32_val2 = lf->pint32_val;
710 char *p = strchr(lf->str, '-');
712 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
713 lex_tok_to_str(token), lf->str);
717 *p++ = 0; /* terminate first half of range */
718 lf->pint32_val = scan_pint(lf, lf->str);
719 lf->pint32_val2 = scan_pint(lf, p);
720 token = T_PINT32_RANGE;
725 if (token != T_NUMBER || !is_a_number(lf->str)) {
726 scan_err2(lf, _("expected an integer number, got %s: %s"),
727 lex_tok_to_str(token), lf->str);
732 lf->int32_val = (int32_t)str_to_int64(lf->str);
734 scan_err2(lf, _("expected an integer number, got %s: %s"),
735 lex_tok_to_str(token), lf->str);
743 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
744 if (token != T_NUMBER || !is_a_number(lf->str)) {
745 scan_err2(lf, _("expected an integer number, got %s: %s"),
746 lex_tok_to_str(token), lf->str);
751 lf->int64_val = str_to_int64(lf->str);
753 scan_err2(lf, _("expected an integer number, got %s: %s"),
754 lex_tok_to_str(token), lf->str);
762 if (token == T_NUMBER) {
763 lf->pint64_val = scan_pint64(lf, lf->str);
764 lf->pint64_val2 = lf->pint64_val;
767 char *p = strchr(lf->str, '-');
769 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
770 lex_tok_to_str(token), lf->str);
774 *p++ = 0; /* terminate first half of range */
775 lf->pint64_val = scan_pint64(lf, lf->str);
776 lf->pint64_val2 = scan_pint64(lf, p);
777 token = T_PINT64_RANGE;
782 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
783 scan_err2(lf, _("expected a name, got %s: %s"),
784 lex_tok_to_str(token), lf->str);
786 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
787 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
788 lf->str_len, MAX_RES_NAME_LENGTH);
794 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
795 scan_err2(lf, _("expected a string, got %s: %s"),
796 lex_tok_to_str(token), lf->str);
805 break; /* no expectation given */
807 lf->token = token; /* set possible new token */