2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Lexical scanner for Bacula configuration file
40 extern int debug_level;
42 /* Debug level for this source file */
43 static const int dbglvl = 5000;
46 * Scan to "logical" end of line. I.e. end of line,
47 * or semicolon, but stop on T_EOB (same as end of
48 * line except it is not eaten).
50 void scan_to_eol(LEX *lc)
53 Dmsg0(dbglvl, "start scan to eof\n");
54 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
63 * Get next token, but skip EOL
65 int scan_to_next_not_eol(LEX * lc)
69 token = lex_get_token(lc, T_ALL);
70 } while (token == T_EOL);
75 * Format a scanner error message
77 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
83 va_start(arg_ptr, msg);
84 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
87 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
88 lc->err_type = M_ERROR_TERM;
91 if (lc->line_no > lc->begin_line_no) {
92 bsnprintf(more, sizeof(more),
93 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
97 if (lc->line_no > 0) {
98 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
99 " : line %d, col %d of file %s\n%s\n%s"),
100 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
102 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
106 void lex_set_default_error_handler(LEX *lf)
108 lf->scan_error = s_err;
112 * Set err_type used in error_handler
113 * return the old value
115 int lex_set_error_handler_error_type(LEX *lf, int err_type)
117 int old = lf->err_type;
118 lf->err_type = err_type;
123 * Free the current file, and retrieve the contents
124 * of the previous packet if any.
126 LEX *lex_close_file(LEX *lf)
131 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
133 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
137 close_bpipe(lf->bpipe);
142 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
145 of->options = lf->options; /* preserve options */
146 memcpy(lf, of, sizeof(LEX));
147 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
157 * Open a new configuration file. We push the
158 * state of the current file (lf) so that we
159 * can do includes. This is a bit of a hammer.
160 * Instead of passing back the pointer to the
161 * new packet, I simply replace the contents
162 * of the caller's packet with the new packet,
163 * and link the contents of the old packet into
167 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
173 char *fname = bstrdup(filename);
176 if (fname[0] == '|') {
177 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
182 } else if ((fd = fopen(fname, "rb")) == NULL) {
186 Dmsg1(400, "Open config file: %s\n", fname);
187 nf = (LEX *)malloc(sizeof(LEX));
189 memcpy(nf, lf, sizeof(LEX));
190 memset(lf, 0, sizeof(LEX));
191 lf->next = nf; /* if have lf, push it behind new one */
192 lf->options = nf->options; /* preserve user options */
194 * preserve err_type to prevent bacula exiting on 'reload'
195 * if config is invalid. Fixes bug #877
197 lf->err_type = nf->err_type;
199 lf = nf; /* start new packet */
200 memset(lf, 0, sizeof(LEX));
201 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
204 lf->scan_error = scan_error;
206 lex_set_default_error_handler(lf);
211 lf->state = lex_none;
213 Dmsg1(dbglvl, "Return lex=%x\n", lf);
218 * Get the next character from the input.
219 * Returns the character or
220 * L_EOF if end of file
221 * L_EOL if end of line
223 int lex_get_char(LEX *lf)
225 if (lf->ch == L_EOF) {
226 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
227 " You may have a open double quote without the closing double quote.\n"));
229 if (lf->ch == L_EOL) {
230 if (bfgets(lf->line, MAXSTRING, lf->fd) == NULL) {
239 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
241 lf->ch = (uint8_t)lf->line[lf->col_no];
247 Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch);
251 void lex_unget_char(LEX *lf)
253 if (lf->ch == L_EOL) {
254 lf->ch = 0; /* End of line, force read of next one */
256 lf->col_no--; /* Backup to re-read char */
263 * Add a character to the current string
265 static void add_str(LEX *lf, int ch)
267 if (lf->str_len >= MAXSTRING-3) {
268 Emsg3(M_ERROR_TERM, 0, _(
269 _("Config token too long, file: %s, line %d, begins at line %d\n")),
270 lf->fname, lf->line_no, lf->begin_line_no);
272 lf->str[lf->str_len++] = ch;
273 lf->str[lf->str_len] = 0;
279 static void begin_str(LEX *lf, int ch)
286 lf->begin_line_no = lf->line_no; /* save start string line no */
290 static const char *lex_state_to_str(int state)
293 case lex_none: return _("none");
294 case lex_comment: return _("comment");
295 case lex_number: return _("number");
296 case lex_ip_addr: return _("ip_addr");
297 case lex_identifier: return _("identifier");
298 case lex_string: return _("string");
299 case lex_quoted_string: return _("quoted_string");
300 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
301 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
302 default: return "??????";
308 * Convert a lex token to a string
309 * used for debug/error printing.
311 const char *lex_tok_to_str(int token)
314 case L_EOF: return "L_EOF";
315 case L_EOL: return "L_EOL";
316 case T_NONE: return "T_NONE";
317 case T_NUMBER: return "T_NUMBER";
318 case T_IPADDR: return "T_IPADDR";
319 case T_IDENTIFIER: return "T_IDENTIFIER";
320 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
321 case T_QUOTED_STRING: return "T_QUOTED_STRING";
322 case T_BOB: return "T_BOB";
323 case T_EOB: return "T_EOB";
324 case T_EQUALS: return "T_EQUALS";
325 case T_ERROR: return "T_ERROR";
326 case T_EOF: return "T_EOF";
327 case T_COMMA: return "T_COMMA";
328 case T_EOL: return "T_EOL";
329 case T_UTF8_BOM: return "T_UTF8_BOM";
330 case T_UTF16_BOM: return "T_UTF16_BOM";
331 default: return "??????";
335 static uint32_t scan_pint(LEX *lf, char *str)
338 if (!is_a_number(str)) {
339 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
343 val = str_to_int64(str);
344 if (errno != 0 || val < 0) {
345 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
349 return (uint32_t)val;
354 * Get the next token from the input
358 lex_get_token(LEX *lf, int expect)
362 bool esc_next = false;
363 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
364 to indicate which transmission format the file is in. The codepoint for
365 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
366 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
367 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
368 to tell which byte we are expecting. */
369 int bom_bytes_seen = 0;
371 Dmsg0(dbglvl, "enter lex_get_token\n");
372 while (token == T_NONE) {
373 ch = lex_get_char(lf);
376 Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
380 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
381 lf->state = lex_string;
383 lf->state = lex_identifier;
389 if (lf->options & LOPT_STRING) {
390 lf->state = lex_string;
392 lf->state = lex_number;
397 Dmsg0(dbglvl, "Enter lex_none switch\n");
401 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
404 lf->state = lex_comment;
415 lf->state = lex_quoted_string;
427 if (expect != T_SKIP_EOL) {
428 token = T_EOL; /* treat ; like EOL */
432 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
433 if (expect != T_SKIP_EOL) {
438 lf->state = lex_include;
441 case 0xEF: /* probably a UTF-8 BOM */
442 case 0xFF: /* probably a UTF-16le BOM */
443 case 0xFE: /* probably a UTF-16be BOM (error)*/
444 if (lf->line_no != 1 || lf->col_no != 1)
446 lf->state = lex_string;
451 lf->state = lex_utf8_bom;
452 } else if (ch == 0xFF) {
453 lf->state = lex_utf16_le_bom;
455 scan_err0(lf, _("This config file appears to be in an "
456 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
462 lf->state = lex_string;
468 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
470 lf->state = lex_none;
471 if (expect != T_SKIP_EOL) {
474 } else if (ch == L_EOF) {
479 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
484 /* Might want to allow trailing specifications here */
490 /* A valid number can be terminated by the following */
491 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
493 lf->state = lex_none;
495 lf->state = lex_string;
504 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
507 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
512 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
513 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
515 token = T_UNQUOTED_STRING;
516 lf->state = lex_none;
522 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
526 } else if (B_ISSPACE(ch)) {
528 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
529 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
531 token = T_IDENTIFIER;
532 lf->state = lex_none;
534 } else if (ch == L_EOF) {
536 lf->state = lex_none;
540 /* Some non-alpha character => string */
541 lf->state = lex_string;
544 case lex_quoted_string:
545 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
564 token = T_QUOTED_STRING;
565 lf->state = lex_none;
570 case lex_include: /* scanning a filename */
575 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
576 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
577 /* Keep the original LEX so we can print an error if the included file can't be opened. */
580 lf->state = lex_none;
581 lf = lex_open_file(lf, lf->str, lf->scan_error);
584 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
585 lfori->str, be.bstrerror());
593 /* we only end up in this state if we have read an 0xEF
594 as the first byte of the file, indicating we are probably
595 reading a UTF-8 file */
596 if (ch == 0xBB && bom_bytes_seen == 1) {
598 } else if (ch == 0xBF && bom_bytes_seen == 2) {
600 lf->state = lex_none;
605 case lex_utf16_le_bom:
606 /* we only end up in this state if we have read an 0xFF
607 as the first byte of the file -- indicating that we are
608 probably dealing with an Intel based (little endian) UTF-16 file*/
611 lf->state = lex_none;
617 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
618 lex_tok_to_str(token), ch);
620 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
624 * Here is where we check to see if the user has set certain
625 * expectations (e.g. 32 bit integer). If so, we do type checking
626 * and possible additional scanning (e.g. for range).
630 lf->pint32_val = scan_pint(lf, lf->str);
631 lf->pint32_val2 = lf->pint32_val;
636 if (token == T_NUMBER) {
637 lf->pint32_val = scan_pint(lf, lf->str);
638 lf->pint32_val2 = lf->pint32_val;
641 char *p = strchr(lf->str, '-');
643 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
644 lex_tok_to_str(token), lf->str);
648 *p++ = 0; /* terminate first half of range */
649 lf->pint32_val = scan_pint(lf, lf->str);
650 lf->pint32_val2 = scan_pint(lf, p);
651 token = T_PINT32_RANGE;
656 if (token != T_NUMBER || !is_a_number(lf->str)) {
657 scan_err2(lf, _("expected an integer number, got %s: %s"),
658 lex_tok_to_str(token), lf->str);
663 lf->int32_val = (int32_t)str_to_int64(lf->str);
665 scan_err2(lf, _("expected an integer number, got %s: %s"),
666 lex_tok_to_str(token), lf->str);
674 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
675 if (token != T_NUMBER || !is_a_number(lf->str)) {
676 scan_err2(lf, _("expected an integer number, got %s: %s"),
677 lex_tok_to_str(token), lf->str);
682 lf->int64_val = str_to_int64(lf->str);
684 scan_err2(lf, _("expected an integer number, got %s: %s"),
685 lex_tok_to_str(token), lf->str);
693 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
694 scan_err2(lf, _("expected a name, got %s: %s"),
695 lex_tok_to_str(token), lf->str);
697 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
698 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
699 lf->str_len, MAX_RES_NAME_LENGTH);
705 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
706 scan_err2(lf, _("expected a string, got %s: %s"),
707 lex_tok_to_str(token), lf->str);
716 break; /* no expectation given */
718 lf->token = token; /* set possible new token */