2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2016 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Lexical scanner for Bacula configuration file
29 /* Debug level for this source file */
30 static const int dbglvl = 5000;
33 * Return false if the end of the line contains anything other
34 * than spaces, or a semicolon or a comment.
36 bool lex_check_eol(LEX *lf)
38 char *ch = lf->line+lf->col_no;
39 while (*ch != '\0' && *ch != '#' && B_ISSPACE(*ch) && *ch != ';') {
42 return *ch == '\0' || *ch == '#' || *ch == ';';
46 * Scan to "logical" end of line. I.e. end of line,
47 * or semicolon, but stop on T_EOB (same as end of
48 * line except it is not eaten).
50 void scan_to_eol(LEX *lc)
53 Dmsg0(dbglvl, "start scan to eol\n");
54 while ((token = lex_get_token(lc, T_ALL)) != T_EOL) {
66 * Get next token, but skip EOL
68 int scan_to_next_not_eol(LEX * lc)
72 token = lex_get_token(lc, T_ALL);
73 } while (token == T_EOL);
78 * Format a scanner error message
80 static void s_err(const char *file, int line, LEX *lc, const char *msg, ...)
86 va_start(arg_ptr, msg);
87 bvsnprintf(buf, sizeof(buf), msg, arg_ptr);
90 if (lc->err_type == 0) { /* M_ERROR_TERM by default */
91 lc->err_type = M_ERROR_TERM;
94 if (lc->line_no > lc->begin_line_no) {
95 bsnprintf(more, sizeof(more),
96 _("Problem probably begins at line %d.\n"), lc->begin_line_no);
100 if (lc->line_no > 0) {
101 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"
102 " : line %d, col %d of file %s\n%s\n%s"),
103 buf, lc->line_no, lc->col_no, lc->fname, lc->line, more);
105 e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf);
109 void lex_set_default_error_handler(LEX *lf)
111 lf->scan_error = s_err;
115 * Set err_type used in error_handler
116 * return the old value
118 int lex_set_error_handler_error_type(LEX *lf, int err_type)
120 int old = lf->err_type;
121 lf->err_type = err_type;
125 /* Store passwords in clear text or with MD5 encoding */
126 void lex_store_clear_passwords(LEX *lf)
128 lf->options |= LOPT_NO_MD5;
132 * Free the current file, and retrieve the contents
133 * of the previous packet if any.
135 LEX *lex_close_file(LEX *lf)
140 Emsg0(M_ABORT, 0, _("Close of NULL file\n"));
142 Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname);
146 close_bpipe(lf->bpipe);
151 Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname);
155 free_memory(lf->line);
157 free_memory(lf->str);
160 of->options = lf->options; /* preserve options */
161 memcpy(lf, of, sizeof(LEX));
162 Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname);
174 * Open a configuration in memory buffer. We push the
175 * state of the current file (lf) so that we
176 * can do includes. This is a bit of a hammer.
177 * Instead of passing back the pointer to the
178 * new packet, I simply replace the contents
179 * of the caller's packet with the new packet,
180 * and link the contents of the old packet into
184 LEX *lex_open_buf(LEX *lf, const char *buffer, LEX_ERROR_HANDLER *scan_error)
189 Dmsg0(400, "Open config buffer\n");
190 nf = (LEX *)malloc(sizeof(LEX));
192 memcpy(nf, lf, sizeof(LEX));
193 memset(lf, 0, sizeof(LEX));
194 lf->next = nf; /* if have lf, push it behind new one */
195 lf->options = nf->options; /* preserve user options */
197 * preserve err_type to prevent bacula exiting on 'reload'
198 * if config is invalid. Fixes bug #877
200 lf->err_type = nf->err_type;
202 lf = nf; /* start new packet */
203 memset(lf, 0, sizeof(LEX));
204 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
207 lf->scan_error = scan_error;
209 lex_set_default_error_handler(lf);
214 lf->line = get_memory(5000);
215 pm_strcpy(lf->line, buffer);
216 pm_strcat(lf->line, "");
217 lf->state = lex_none;
219 lf->str = get_memory(5000);
224 * Open a new configuration file. We push the
225 * state of the current file (lf) so that we
226 * can do includes. This is a bit of a hammer.
227 * Instead of passing back the pointer to the
228 * new packet, I simply replace the contents
229 * of the caller's packet with the new packet,
230 * and link the contents of the old packet into
234 LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error)
240 char *fname = bstrdup(filename);
242 if (fname[0] == '|') {
243 if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) {
248 } else if ((fd = fopen(fname, "rb")) == NULL) {
252 Dmsg1(400, "Open config file: %s\n", fname);
253 nf = (LEX *)malloc(sizeof(LEX));
255 memcpy(nf, lf, sizeof(LEX));
256 memset(lf, 0, sizeof(LEX));
257 lf->next = nf; /* if have lf, push it behind new one */
258 lf->options = nf->options; /* preserve user options */
260 * preserve err_type to prevent bacula exiting on 'reload'
261 * if config is invalid. Fixes bug #877
263 lf->err_type = nf->err_type;
265 lf = nf; /* start new packet */
266 memset(lf, 0, sizeof(LEX));
267 lex_set_error_handler_error_type(lf, M_ERROR_TERM);
270 lf->scan_error = scan_error;
272 lex_set_default_error_handler(lf);
277 lf->line = get_memory(5000);
278 lf->state = lex_none;
280 lf->str = get_memory(5000);
281 Dmsg1(dbglvl, "Return lex=%x\n", lf);
286 * Get the next character from the input.
287 * Returns the character or
288 * L_EOF if end of file
289 * L_EOL if end of line
291 int lex_get_char(LEX *lf)
293 if (lf->ch == L_EOF) {
294 Emsg0(M_ABORT, 0, _("get_char: called after EOF."
295 " You may have a open double quote without the closing double quote.\n"));
297 if (lf->fd && lf->ch == L_EOL) {
298 if (bfgets(lf->line, lf->fd) == NULL) {
307 Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line);
308 } else if (lf->ch == L_EOL) {
312 lf->ch = (uint8_t)lf->line[lf->col_no];
315 lf->ch = L_EOL; /* reached end of line, force bfgets */
320 if (lf->ch == 0) { /* End of buffer, stop scan */
326 } else if (lf->ch == '\n') { /* End of line */
327 Dmsg0(dbglvl, "Found newline return L_EOL\n");
333 Dmsg3(dbglvl, "lex_get_char: %c %d col=%d\n", lf->ch, lf->ch, lf->col_no);
337 void lex_unget_char(LEX *lf)
339 if (lf->ch == L_EOL) {
340 lf->ch = 0; /* End of line, force read of next one */
342 lf->col_no--; /* Backup to re-read char */
348 * Add a character to the current string
350 static void add_str(LEX *lf, int ch)
352 if (lf->str_len >= sizeof_pool_memory(lf->str)) {
353 Emsg3(M_ERROR_TERM, 0, _(
354 _("Config token too long, file: %s, line %d, begins at line %d\n")),
355 lf->fname, lf->line_no, lf->begin_line_no);
357 lf->str[lf->str_len++] = ch;
358 lf->str[lf->str_len] = 0;
364 static void begin_str(LEX *lf, int ch)
371 lf->begin_line_no = lf->line_no; /* save start string line no */
375 static const char *lex_state_to_str(int state)
378 case lex_none: return _("none");
379 case lex_comment: return _("comment");
380 case lex_number: return _("number");
381 case lex_ip_addr: return _("ip_addr");
382 case lex_identifier: return _("identifier");
383 case lex_string: return _("string");
384 case lex_quoted_string: return _("quoted_string");
385 case lex_include: return _("include");
386 case lex_include_quoted_string: return _("include_quoted_string");
387 case lex_utf8_bom: return _("UTF-8 Byte Order Mark");
388 case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark");
389 default: return "??????";
395 * Convert a lex token to a string
396 * used for debug/error printing.
398 const char *lex_tok_to_str(int token)
401 case L_EOF: return "L_EOF";
402 case L_EOL: return "L_EOL";
403 case T_NONE: return "T_NONE";
404 case T_NUMBER: return "T_NUMBER";
405 case T_IPADDR: return "T_IPADDR";
406 case T_IDENTIFIER: return "T_IDENTIFIER";
407 case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING";
408 case T_QUOTED_STRING: return "T_QUOTED_STRING";
409 case T_BOB: return "T_BOB";
410 case T_EOB: return "T_EOB";
411 case T_EQUALS: return "T_EQUALS";
412 case T_ERROR: return "T_ERROR";
413 case T_EOF: return "T_EOF";
414 case T_COMMA: return "T_COMMA";
415 case T_EOL: return "T_EOL";
416 case T_UTF8_BOM: return "T_UTF8_BOM";
417 case T_UTF16_BOM: return "T_UTF16_BOM";
418 default: return "??????";
422 static uint32_t scan_pint(LEX *lf, char *str)
425 if (!is_a_number(str)) {
426 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
430 val = str_to_int64(str);
431 if (errno != 0 || val < 0) {
432 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
436 return (uint32_t)val;
439 static uint64_t scan_pint64(LEX *lf, char *str)
442 if (!is_a_number(str)) {
443 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
447 val = str_to_uint64(str);
449 scan_err1(lf, _("expected a positive integer number, got: %s"), str);
458 * Get the next token from the input
462 lex_get_token(LEX *lf, int expect)
466 bool esc_next = false;
467 /* Unicode files, especially on Win32, may begin with a "Byte Order Mark"
468 to indicate which transmission format the file is in. The codepoint for
469 this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8
470 and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian).
471 We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen
472 to tell which byte we are expecting. */
473 int bom_bytes_seen = 0;
475 Dmsg1(dbglvl, "enter lex_get_token state=%s\n", lex_state_to_str(lf->state));
476 while (token == T_NONE) {
477 ch = lex_get_char(lf);
480 Dmsg2(dbglvl, "Lex state lex_none ch=%c,%d\n", ch, ch);
484 if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) {
485 lf->state = lex_string;
487 lf->state = lex_identifier;
493 if (lf->options & LOPT_STRING) {
494 lf->state = lex_string;
496 lf->state = lex_number;
501 Dmsg0(dbglvl, "Enter lex_none switch\n");
505 Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n");
508 nch = lex_get_char(lf);
509 if (nch == ' ' || nch == '\n' || nch == '\r' || nch == L_EOL) {
510 lf->ch = L_EOL; /* force end of line */
514 lf->state = lex_comment;
525 lf->state = lex_quoted_string;
537 if (expect != T_SKIP_EOL) {
538 token = T_EOL; /* treat ; like EOL */
542 Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n");
543 if (expect != T_SKIP_EOL) {
548 /* In NO_EXTERN mode, @ is part of a string */
549 if (lf->options & LOPT_NO_EXTERN) {
550 lf->state = lex_string;
553 lf->state = lex_include;
557 case 0xEF: /* probably a UTF-8 BOM */
558 case 0xFF: /* probably a UTF-16le BOM */
559 case 0xFE: /* probably a UTF-16be BOM (error)*/
560 if (lf->line_no != 1 || lf->col_no != 1)
562 lf->state = lex_string;
567 lf->state = lex_utf8_bom;
568 } else if (ch == 0xFF) {
569 lf->state = lex_utf16_le_bom;
571 scan_err0(lf, _("This config file appears to be in an "
572 "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n"));
578 lf->state = lex_string;
584 Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch);
586 lf->state = lex_none;
587 if (expect != T_SKIP_EOL) {
590 } else if (ch == L_EOF) {
595 Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch);
600 /* Might want to allow trailing specifications here */
606 /* A valid number can be terminated by the following */
607 if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') {
609 lf->state = lex_none;
611 lf->state = lex_string;
620 Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch);
623 Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch);
628 if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
629 ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) {
631 token = T_UNQUOTED_STRING;
632 lf->state = lex_none;
638 Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch);
642 } else if (B_ISSPACE(ch)) {
644 } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' ||
645 ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') {
647 token = T_IDENTIFIER;
648 lf->state = lex_none;
650 } else if (ch == L_EOF) {
652 lf->state = lex_none;
656 /* Some non-alpha character => string */
657 lf->state = lex_string;
660 case lex_quoted_string:
661 Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch);
680 token = T_QUOTED_STRING;
682 * Since we may be scanning a quoted list of names,
683 * we get the next character (a comma indicates another
684 * one), then we put it back for rescanning.
688 lf->state = lex_none;
693 case lex_include_quoted_string:
708 /* Keep the original LEX so we can print an error if the included file can't be opened. */
710 /* Skip the double quote when restarting parsing */
713 lf->state = lex_none;
714 lf = lex_open_file(lf, lf->str, lf->scan_error);
717 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
718 lfori->str, be.bstrerror());
725 case lex_include: /* scanning a filename */
731 lf->state = lex_include_quoted_string;
736 if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' ||
737 ch == ';' || ch == ',' || ch == '"' || ch == '#') {
738 /* Keep the original LEX so we can print an error if the included file can't be opened. */
741 lf->state = lex_none;
742 lf = lex_open_file(lf, lf->str, lf->scan_error);
745 scan_err2(lfori, _("Cannot open included config file %s: %s\n"),
746 lfori->str, be.bstrerror());
754 /* we only end up in this state if we have read an 0xEF
755 as the first byte of the file, indicating we are probably
756 reading a UTF-8 file */
757 if (ch == 0xBB && bom_bytes_seen == 1) {
759 } else if (ch == 0xBF && bom_bytes_seen == 2) {
761 lf->state = lex_none;
766 case lex_utf16_le_bom:
767 /* we only end up in this state if we have read an 0xFF
768 as the first byte of the file -- indicating that we are
769 probably dealing with an Intel based (little endian) UTF-16 file*/
772 lf->state = lex_none;
778 Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state),
779 lex_tok_to_str(token), ch);
781 Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token));
785 * Here is where we check to see if the user has set certain
786 * expectations (e.g. 32 bit integer). If so, we do type checking
787 * and possible additional scanning (e.g. for range).
791 lf->pint32_val = scan_pint(lf, lf->str);
792 lf->pint32_val2 = lf->pint32_val;
797 if (token == T_NUMBER) {
798 lf->pint32_val = scan_pint(lf, lf->str);
799 lf->pint32_val2 = lf->pint32_val;
802 char *p = strchr(lf->str, '-');
804 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
805 lex_tok_to_str(token), lf->str);
809 *p++ = 0; /* terminate first half of range */
810 lf->pint32_val = scan_pint(lf, lf->str);
811 lf->pint32_val2 = scan_pint(lf, p);
812 token = T_PINT32_RANGE;
817 if (token != T_NUMBER || !is_a_number(lf->str)) {
818 scan_err2(lf, _("expected an integer number, got %s: %s"),
819 lex_tok_to_str(token), lf->str);
824 lf->int32_val = (int32_t)str_to_int64(lf->str);
826 scan_err2(lf, _("expected an integer number, got %s: %s"),
827 lex_tok_to_str(token), lf->str);
835 Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL));
836 if (token != T_NUMBER || !is_a_number(lf->str)) {
837 scan_err2(lf, _("expected an integer number, got %s: %s"),
838 lex_tok_to_str(token), lf->str);
843 lf->int64_val = str_to_int64(lf->str);
845 scan_err2(lf, _("expected an integer number, got %s: %s"),
846 lex_tok_to_str(token), lf->str);
854 if (token == T_NUMBER) {
855 lf->pint64_val = scan_pint64(lf, lf->str);
856 lf->pint64_val2 = lf->pint64_val;
859 char *p = strchr(lf->str, '-');
861 scan_err2(lf, _("expected an integer or a range, got %s: %s"),
862 lex_tok_to_str(token), lf->str);
866 *p++ = 0; /* terminate first half of range */
867 lf->pint64_val = scan_pint64(lf, lf->str);
868 lf->pint64_val2 = scan_pint64(lf, p);
869 token = T_PINT64_RANGE;
874 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
875 scan_err2(lf, _("expected a name, got %s: %s"),
876 lex_tok_to_str(token), lf->str);
878 } else if (lf->str_len > MAX_RES_NAME_LENGTH) {
879 scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str,
880 lf->str_len, MAX_RES_NAME_LENGTH);
886 if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) {
887 scan_err2(lf, _("expected a string, got %s: %s"),
888 lex_tok_to_str(token), lf->str);
897 break; /* no expectation given */
899 lf->token = token; /* set possible new token */