2 Bacula® - The Network Backup Solution
4 Copyright (C) 2000-2007 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Lexical scanner for Bacula configuration file
40 extern int debug_level;
42 /* Debug level for this source file */
43 static const int dbglvl = 5000;
46 * Scan to "logical" end of line. I.e. end of line,
47 * or semicolon, but stop on T_EOB (same as end of
48 * line except it is not eaten).
50 void scan_to_eol(LEX *lc)
53 Dmsg0(dbglvl, "start scan to eof\n");
54 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
63 * Get next token, but skip EOL
65 int scan_to_next_not_eol(LEX * lc)
69 token = lex_get_token(lc, T_ALL);
70 } while (token == T_EOL);
75 * Format a scanner error message
77 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
83 va_start(arg_ptr, msg);
84 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
87 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
88 lc->err_type = M_ERROR_TERM;
91 if (lc->line_no > lc->begin_line_no) {
92 bsnprintf(more, sizeof(more),
93 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
97 if (lc->line_no > 0) {
98 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
99 " : line %d, col %d of file %s\n%s\n%s"),
100 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
102 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
106 void lex_set_default_error_handler(LEX *lf)
108 lf->scan_error = s_err;
112 * Set err_type used in error_handler
113 * return the old value
115 int lex_set_error_handler_error_type(LEX *lf, int err_type)
117 int old = lf->err_type;
118 lf->err_type = err_type;
123 * Free the current file, and retrieve the contents
124 * of the previous packet if any.
126 LEX *lex_close_file(LEX *lf)
131 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
133 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
137 close_bpipe(lf->bpipe);
142 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
145 of->options = lf->options; /* preserve options */
146 memcpy(lf, of, sizeof(LEX));
147 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
157 * Open a new configuration file. We push the
158 * state of the current file (lf) so that we
159 * can do includes. This is a bit of a hammer.
160 * Instead of passing back the pointer to the
161 * new packet, I simply replace the contents
162 * of the caller's packet with the new packet,
163 * and link the contents of the old packet into
167 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
173 char *fname = bstrdup(filename);
176 if (fname[0] == '|') {
177 if ((bpipe = open_bpipe(fname, 0, "rb")) == NULL) {
182 } else if ((fd = fopen(fname, "rb")) == NULL) {
186 Dmsg1(400, "Open config file: %s\n", fname);
187 nf = (LEX *)malloc(sizeof(LEX));
189 memcpy(nf, lf, sizeof(LEX));
190 memset(lf, 0, sizeof(LEX));
191 lf->next = nf; /* if have lf, push it behind new one */
192 lf->options = nf->options; /* preserve user options */
194 lf = nf; /* start new packet */
195 memset(lf, 0, sizeof(LEX));
196 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
199 lf->scan_error = scan_error;
201 lex_set_default_error_handler(lf);
206 lf->state = lex_none;
208 Dmsg1(dbglvl, "Return lex=%x\n", lf);
213 * Get the next character from the input.
214 * Returns the character or
215 * L_EOF if end of file
216 * L_EOL if end of line
218 int lex_get_char(LEX *lf)
220 if (lf->ch == L_EOF) {
221 Emsg0(M_ABORT, 0, _("get_char: called after EOF\n"));
223 if (lf->ch == L_EOL) {
224 if (bfgets(lf->line, MAXSTRING, lf->fd) == NULL) {
233 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
235 lf->ch = (uint8_t)lf->line[lf->col_no];
241 Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch);
245 void lex_unget_char(LEX *lf)
247 if (lf->ch == L_EOL) {
248 lf->ch = 0; /* End of line, force read of next one */
250 lf->col_no--; /* Backup to re-read char */
257 * Add a character to the current string
259 static void add_str(LEX *lf, int ch)
261 if (lf->str_len >= MAXSTRING-3) {
262 Emsg3(M_ERROR_TERM, 0, _(
263 _("Config token too long, file: %s, line %d, begins at line %d\n")),
264 lf->fname, lf->line_no, lf->begin_line_no);
266 lf->str[lf->str_len++] = ch;
267 lf->str[lf->str_len] = 0;
273 static void begin_str(LEX *lf, int ch)
280 lf->begin_line_no = lf->line_no; /* save start string line no */
284 static const char *lex_state_to_str(int state)
287 case lex_none: return _("none");
288 case lex_comment: return _("comment");
289 case lex_number: return _("number");
290 case lex_ip_addr: return _("ip_addr");
291 case lex_identifier: return _("identifier");
292 case lex_string: return _("string");
293 case lex_quoted_string: return _("quoted_string");
294 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
295 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
296 default: return "??????";
302 * Convert a lex token to a string
303 * used for debug/error printing.
305 const char *lex_tok_to_str(int token)
308 case L_EOF: return "L_EOF";
309 case L_EOL: return "L_EOL";
310 case T_NONE: return "T_NONE";
311 case T_NUMBER: return "T_NUMBER";
312 case T_IPADDR: return "T_IPADDR";
313 case T_IDENTIFIER: return "T_IDENTIFIER";
314 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
315 case T_QUOTED_STRING: return "T_QUOTED_STRING";
316 case T_BOB: return "T_BOB";
317 case T_EOB: return "T_EOB";
318 case T_EQUALS: return "T_EQUALS";
319 case T_ERROR: return "T_ERROR";
320 case T_EOF: return "T_EOF";
321 case T_COMMA: return "T_COMMA";
322 case T_EOL: return "T_EOL";
323 case T_UTF8_BOM: return "T_UTF8_BOM";
324 case T_UTF16_BOM: return "T_UTF16_BOM";
325 default: return "??????";
329 static uint32_t scan_pint(LEX *lf, char *str)
332 if (!is_a_number(str)) {
333 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
337 val = str_to_int64(str);
338 if (errno != 0 || val < 0) {
339 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
343 return (uint32_t)val;
348 * Get the next token from the input
352 lex_get_token(LEX *lf, int expect)
356 bool esc_next = false;
357 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
358 to indicate which transmission format the file is in. The codepoint for
359 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
360 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
361 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
362 to tell which byte we are expecting. */
363 int bom_bytes_seen = 0;
365 Dmsg0(dbglvl, "enter lex_get_token\n");
366 while (token == T_NONE) {
367 ch = lex_get_char(lf);
370 Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch);
374 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
375 lf->state = lex_string;
377 lf->state = lex_identifier;
383 if (lf->options & LOPT_STRING) {
384 lf->state = lex_string;
386 lf->state = lex_number;
391 Dmsg0(dbglvl, "Enter lex_none switch\n");
395 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
398 lf->state = lex_comment;
409 lf->state = lex_quoted_string;
421 if (expect != T_SKIP_EOL) {
422 token = T_EOL; /* treat ; like EOL */
426 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
427 if (expect != T_SKIP_EOL) {
432 lf->state = lex_include;
435 case 0xEF: /* probably a UTF-8 BOM */
436 case 0xFF: /* probably a UTF-16le BOM */
437 case 0xFE: /* probably a UTF-16be BOM (error)*/
438 if (lf->line_no != 1 || lf->col_no != 1)
440 lf->state = lex_string;
445 lf->state = lex_utf8_bom;
446 } else if (ch == 0xFF) {
447 lf->state = lex_utf16_le_bom;
449 scan_err0(lf, _("This config file appears to be in an "
450 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
456 lf->state = lex_string;
462 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
464 lf->state = lex_none;
465 if (expect != T_SKIP_EOL) {
468 } else if (ch == L_EOF) {
473 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
478 /* Might want to allow trailing specifications here */
484 /* A valid number can be terminated by the following */
485 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
487 lf->state = lex_none;
489 lf->state = lex_string;
498 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
501 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
506 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
507 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
509 token = T_UNQUOTED_STRING;
510 lf->state = lex_none;
516 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
520 } else if (B_ISSPACE(ch)) {
522 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
523 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
525 token = T_IDENTIFIER;
526 lf->state = lex_none;
528 } else if (ch == L_EOF) {
530 lf->state = lex_none;
534 /* Some non-alpha character => string */
535 lf->state = lex_string;
538 case lex_quoted_string:
539 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
558 token = T_QUOTED_STRING;
559 lf->state = lex_none;
564 case lex_include: /* scanning a filename */
569 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
570 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
571 /* Keep the original LEX so we can print an error if the included file can't be opened. */
574 lf->state = lex_none;
575 lf = lex_open_file(lf, lf->str, lf->scan_error);
578 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
579 lfori->str, be.bstrerror());
587 /* we only end up in this state if we have read an 0xEF
588 as the first byte of the file, indicating we are probably
589 reading a UTF-8 file */
590 if (ch == 0xBB && bom_bytes_seen == 1) {
592 } else if (ch == 0xBF && bom_bytes_seen == 2) {
594 lf->state = lex_none;
599 case lex_utf16_le_bom:
600 /* we only end up in this state if we have read an 0xFF
601 as the first byte of the file -- indicating that we are
602 probably dealing with an Intel based (little endian) UTF-16 file*/
605 lf->state = lex_none;
611 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
612 lex_tok_to_str(token), ch);
614 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
618 * Here is where we check to see if the user has set certain
619 * expectations (e.g. 32 bit integer). If so, we do type checking
620 * and possible additional scanning (e.g. for range).
624 lf->pint32_val = scan_pint(lf, lf->str);
625 lf->pint32_val2 = lf->pint32_val;
630 if (token == T_NUMBER) {
631 lf->pint32_val = scan_pint(lf, lf->str);
632 lf->pint32_val2 = lf->pint32_val;
635 char *p = strchr(lf->str, '-');
637 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
638 lex_tok_to_str(token), lf->str);
642 *p++ = 0; /* terminate first half of range */
643 lf->pint32_val = scan_pint(lf, lf->str);
644 lf->pint32_val2 = scan_pint(lf, p);
645 token = T_PINT32_RANGE;
650 if (token != T_NUMBER || !is_a_number(lf->str)) {
651 scan_err2(lf, _("expected an integer number, got %s: %s"),
652 lex_tok_to_str(token), lf->str);
657 lf->int32_val = (int32_t)str_to_int64(lf->str);
659 scan_err2(lf, _("expected an integer number, got %s: %s"),
660 lex_tok_to_str(token), lf->str);
668 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
669 if (token != T_NUMBER || !is_a_number(lf->str)) {
670 scan_err2(lf, _("expected an integer number, got %s: %s"),
671 lex_tok_to_str(token), lf->str);
676 lf->int64_val = str_to_int64(lf->str);
678 scan_err2(lf, _("expected an integer number, got %s: %s"),
679 lex_tok_to_str(token), lf->str);
687 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
688 scan_err2(lf, _("expected a name, got %s: %s"),
689 lex_tok_to_str(token), lf->str);
691 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
692 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
693 lf->str_len, MAX_RES_NAME_LENGTH);
699 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
700 scan_err2(lf, _("expected a string, got %s: %s"),
701 lex_tok_to_str(token), lf->str);
710 break; /* no expectation given */
712 lf->token = token; /* set possible new token */