2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2012 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Lexical scanner for Bacula configuration file
38 extern int debug_level;
40 /* Debug level for this source file */
41 static const int dbglvl = 5000;
44 * Scan to "logical" end of line. I.e. end of line,
45 * or semicolon, but stop on T_EOB (same as end of
46 * line except it is not eaten).
48 void scan_to_eol(LEX *lc)
51 Dmsg0(dbglvl, "start scan to eof\n");
52 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
61 * Get next token, but skip EOL
63 int scan_to_next_not_eol(LEX * lc)
67 token = lex_get_token(lc, T_ALL);
68 } while (token == T_EOL);
73 * Format a scanner error message
75 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
81 va_start(arg_ptr, msg);
82 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
85 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
86 lc->err_type = M_ERROR_TERM;
89 if (lc->line_no > lc->begin_line_no) {
90 bsnprintf(more, sizeof(more),
91 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
95 if (lc->line_no > 0) {
96 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
97 " : line %d, col %d of file %s\n%s\n%s"),
98 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
100 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
104 void lex_set_default_error_handler(LEX *lf)
106 lf->scan_error = s_err;
110 * Set err_type used in error_handler
111 * return the old value
113 int lex_set_error_handler_error_type(LEX *lf, int err_type)
115 int old = lf->err_type;
116 lf->err_type = err_type;
121 * Free the current file, and retrieve the contents
122 * of the previous packet if any.
124 LEX *lex_close_file(LEX *lf)
129 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
131 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
135 close_bpipe(lf->bpipe);
140 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
142 free_memory(lf->line);
145 of->options = lf->options; /* preserve options */
146 memcpy(lf, of, sizeof(LEX));
147 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
157 * Open a new configuration file. We push the
158 * state of the current file (lf) so that we
159 * can do includes. This is a bit of a hammer.
160 * Instead of passing back the pointer to the
161 * new packet, I simply replace the contents
162 * of the caller's packet with the new packet,
163 * and link the contents of the old packet into
167 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
173 char *fname = bstrdup(filename);
175 if (fname[0] == '|') {
176 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
181 } else if ((fd = fopen(fname, "rb")) == NULL) {
185 Dmsg1(400, "Open config file: %s\n", fname);
186 nf = (LEX *)malloc(sizeof(LEX));
188 memcpy(nf, lf, sizeof(LEX));
189 memset(lf, 0, sizeof(LEX));
190 lf->next = nf; /* if have lf, push it behind new one */
191 lf->options = nf->options; /* preserve user options */
193 * preserve err_type to prevent bacula exiting on 'reload'
194 * if config is invalid. Fixes bug #877
196 lf->err_type = nf->err_type;
198 lf = nf; /* start new packet */
199 memset(lf, 0, sizeof(LEX));
200 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
203 lf->scan_error = scan_error;
205 lex_set_default_error_handler(lf);
210 lf->line = get_memory(5000);
211 lf->state = lex_none;
213 Dmsg1(dbglvl, "Return lex=%x\n", lf);
218 * Get the next character from the input.
219 * Returns the character or
220 * L_EOF if end of file
221 * L_EOL if end of line
223 int lex_get_char(LEX *lf)
225 if (lf->ch == L_EOF) {
226 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
227 " You may have a open double quote without the closing double quote.\n"));
229 if (lf->ch == L_EOL) {
230 if (bfgets(lf->line, lf->fd) == NULL) {
239 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
241 lf->ch = (uint8_t)lf->line[lf->col_no];
247 Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch);
251 void lex_unget_char(LEX *lf)
253 if (lf->ch == L_EOL) {
254 lf->ch = 0; /* End of line, force read of next one */
256 lf->col_no--; /* Backup to re-read char */
263 * Add a character to the current string
265 static void add_str(LEX *lf, int ch)
267 if (lf->str_len >= MAXSTRING-3) {
268 Emsg3(M_ERROR_TERM, 0, _(
269 _("Config token too long, file: %s, line %d, begins at line %d\n")),
270 lf->fname, lf->line_no, lf->begin_line_no);
272 lf->str[lf->str_len++] = ch;
273 lf->str[lf->str_len] = 0;
279 static void begin_str(LEX *lf, int ch)
286 lf->begin_line_no = lf->line_no; /* save start string line no */
290 static const char *lex_state_to_str(int state)
293 case lex_none: return _("none");
294 case lex_comment: return _("comment");
295 case lex_number: return _("number");
296 case lex_ip_addr: return _("ip_addr");
297 case lex_identifier: return _("identifier");
298 case lex_string: return _("string");
299 case lex_quoted_string: return _("quoted_string");
300 case lex_include: return _("include");
301 case lex_include_quoted_string: return _("include_quoted_string");
302 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
303 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
304 default: return "??????";
310 * Convert a lex token to a string
311 * used for debug/error printing.
313 const char *lex_tok_to_str(int token)
316 case L_EOF: return "L_EOF";
317 case L_EOL: return "L_EOL";
318 case T_NONE: return "T_NONE";
319 case T_NUMBER: return "T_NUMBER";
320 case T_IPADDR: return "T_IPADDR";
321 case T_IDENTIFIER: return "T_IDENTIFIER";
322 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
323 case T_QUOTED_STRING: return "T_QUOTED_STRING";
324 case T_BOB: return "T_BOB";
325 case T_EOB: return "T_EOB";
326 case T_EQUALS: return "T_EQUALS";
327 case T_ERROR: return "T_ERROR";
328 case T_EOF: return "T_EOF";
329 case T_COMMA: return "T_COMMA";
330 case T_EOL: return "T_EOL";
331 case T_UTF8_BOM: return "T_UTF8_BOM";
332 case T_UTF16_BOM: return "T_UTF16_BOM";
333 default: return "??????";
337 static uint32_t scan_pint(LEX *lf, char *str)
340 if (!is_a_number(str)) {
341 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
345 val = str_to_int64(str);
346 if (errno != 0 || val < 0) {
347 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
351 return (uint32_t)val;
354 static uint64_t scan_pint64(LEX *lf, char *str)
357 if (!is_a_number(str)) {
358 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
362 val = str_to_uint64(str);
364 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
373 * Get the next token from the input
377 lex_get_token(LEX *lf, int expect)
381 bool esc_next = false;
382 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
383 to indicate which transmission format the file is in. The codepoint for
384 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
385 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
386 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
387 to tell which byte we are expecting. */
388 int bom_bytes_seen = 0;
390 Dmsg0(dbglvl, "enter lex_get_token\n");
391 while (token == T_NONE) {
392 ch = lex_get_char(lf);
395 Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
399 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
400 lf->state = lex_string;
402 lf->state = lex_identifier;
408 if (lf->options & LOPT_STRING) {
409 lf->state = lex_string;
411 lf->state = lex_number;
416 Dmsg0(dbglvl, "Enter lex_none switch\n");
420 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
423 lf->state = lex_comment;
434 lf->state = lex_quoted_string;
446 if (expect != T_SKIP_EOL) {
447 token = T_EOL; /* treat ; like EOL */
451 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
452 if (expect != T_SKIP_EOL) {
457 /* In NO_EXTERN mode, @ is part of a string */
458 if (lf->options & LOPT_NO_EXTERN) {
459 lf->state = lex_string;
462 lf->state = lex_include;
466 case 0xEF: /* probably a UTF-8 BOM */
467 case 0xFF: /* probably a UTF-16le BOM */
468 case 0xFE: /* probably a UTF-16be BOM (error)*/
469 if (lf->line_no != 1 || lf->col_no != 1)
471 lf->state = lex_string;
476 lf->state = lex_utf8_bom;
477 } else if (ch == 0xFF) {
478 lf->state = lex_utf16_le_bom;
480 scan_err0(lf, _("This config file appears to be in an "
481 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
487 lf->state = lex_string;
493 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
495 lf->state = lex_none;
496 if (expect != T_SKIP_EOL) {
499 } else if (ch == L_EOF) {
504 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
509 /* Might want to allow trailing specifications here */
515 /* A valid number can be terminated by the following */
516 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
518 lf->state = lex_none;
520 lf->state = lex_string;
529 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
532 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
537 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
538 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
540 token = T_UNQUOTED_STRING;
541 lf->state = lex_none;
547 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
551 } else if (B_ISSPACE(ch)) {
553 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
554 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
556 token = T_IDENTIFIER;
557 lf->state = lex_none;
559 } else if (ch == L_EOF) {
561 lf->state = lex_none;
565 /* Some non-alpha character => string */
566 lf->state = lex_string;
569 case lex_quoted_string:
570 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
589 token = T_QUOTED_STRING;
590 lf->state = lex_none;
595 case lex_include_quoted_string:
610 /* Keep the original LEX so we can print an error if the included file can't be opened. */
612 /* Skip the double quote when restarting parsing */
615 lf->state = lex_none;
616 lf = lex_open_file(lf, lf->str, lf->scan_error);
619 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
620 lfori->str, be.bstrerror());
627 case lex_include: /* scanning a filename */
633 lf->state = lex_include_quoted_string;
638 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
639 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
640 /* Keep the original LEX so we can print an error if the included file can't be opened. */
643 lf->state = lex_none;
644 lf = lex_open_file(lf, lf->str, lf->scan_error);
647 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
648 lfori->str, be.bstrerror());
656 /* we only end up in this state if we have read an 0xEF
657 as the first byte of the file, indicating we are probably
658 reading a UTF-8 file */
659 if (ch == 0xBB && bom_bytes_seen == 1) {
661 } else if (ch == 0xBF && bom_bytes_seen == 2) {
663 lf->state = lex_none;
668 case lex_utf16_le_bom:
669 /* we only end up in this state if we have read an 0xFF
670 as the first byte of the file -- indicating that we are
671 probably dealing with an Intel based (little endian) UTF-16 file*/
674 lf->state = lex_none;
680 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
681 lex_tok_to_str(token), ch);
683 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
687 * Here is where we check to see if the user has set certain
688 * expectations (e.g. 32 bit integer). If so, we do type checking
689 * and possible additional scanning (e.g. for range).
693 lf->pint32_val = scan_pint(lf, lf->str);
694 lf->pint32_val2 = lf->pint32_val;
699 if (token == T_NUMBER) {
700 lf->pint32_val = scan_pint(lf, lf->str);
701 lf->pint32_val2 = lf->pint32_val;
704 char *p = strchr(lf->str, '-');
706 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
707 lex_tok_to_str(token), lf->str);
711 *p++ = 0; /* terminate first half of range */
712 lf->pint32_val = scan_pint(lf, lf->str);
713 lf->pint32_val2 = scan_pint(lf, p);
714 token = T_PINT32_RANGE;
719 if (token != T_NUMBER || !is_a_number(lf->str)) {
720 scan_err2(lf, _("expected an integer number, got %s: %s"),
721 lex_tok_to_str(token), lf->str);
726 lf->int32_val = (int32_t)str_to_int64(lf->str);
728 scan_err2(lf, _("expected an integer number, got %s: %s"),
729 lex_tok_to_str(token), lf->str);
737 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
738 if (token != T_NUMBER || !is_a_number(lf->str)) {
739 scan_err2(lf, _("expected an integer number, got %s: %s"),
740 lex_tok_to_str(token), lf->str);
745 lf->int64_val = str_to_int64(lf->str);
747 scan_err2(lf, _("expected an integer number, got %s: %s"),
748 lex_tok_to_str(token), lf->str);
756 if (token == T_NUMBER) {
757 lf->pint64_val = scan_pint64(lf, lf->str);
758 lf->pint64_val2 = lf->pint64_val;
761 char *p = strchr(lf->str, '-');
763 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
764 lex_tok_to_str(token), lf->str);
768 *p++ = 0; /* terminate first half of range */
769 lf->pint64_val = scan_pint64(lf, lf->str);
770 lf->pint64_val2 = scan_pint64(lf, p);
771 token = T_PINT64_RANGE;
776 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
777 scan_err2(lf, _("expected a name, got %s: %s"),
778 lex_tok_to_str(token), lf->str);
780 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
781 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
782 lf->str_len, MAX_RES_NAME_LENGTH);
788 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
789 scan_err2(lf, _("expected a string, got %s: %s"),
790 lex_tok_to_str(token), lf->str);
799 break; /* no expectation given */
801 lf->token = token; /* set possible new token */