X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Flib%2Flex.c;h=03b64ac0d23498426259807bbc5a9c9bd7756be5;hb=2df589363f0c60f94ca9d5e856dfaf7cc5fcb178;hp=c73016e138854b5052698daa83a16d9f0a84e7f3;hpb=f4fadeaf83a912e0a19c19eacd0c7113b21f0a67;p=bacula%2Fbacula diff --git a/bacula/src/lib/lex.c b/bacula/src/lib/lex.c index c73016e138..03b64ac0d2 100644 --- a/bacula/src/lib/lex.c +++ b/bacula/src/lib/lex.c @@ -1,30 +1,37 @@ /* - * Lexical scanner for Bacula configuration file - * - * Kern Sibbald, 2000 - * - * Version $Id$ - * - */ + Bacula® - The Network Backup Solution -/* - Copyright (C) 2000-2004 Kern Sibbald + Copyright (C) 2000-2008 Free Software Foundation Europe e.V. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version two of the GNU General Public + License as published by the Free Software Foundation and included + in the file LICENSE. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ +/* + * Lexical scanner for Bacula configuration file + * + * Kern Sibbald, 2000 + * + * Version $Id$ + * */ #include "bacula.h" @@ -32,6 +39,9 @@ extern int debug_level; +/* Debug level for this source file */ +static const int dbglvl = 5000; + /* * Scan to "logical" end of line. I.e. end of line, * or semicolon, but stop on T_EOB (same as end of @@ -40,7 +50,7 @@ extern int debug_level; void scan_to_eol(LEX *lc) { int token; - Dmsg0(2000, "start scan to eof\n"); + Dmsg0(dbglvl, "start scan to eof\n"); while ((token = lex_get_token(lc, T_ALL)) != T_EOL) { if (token == T_EOB) { lex_unget_char(lc); @@ -61,7 +71,6 @@ int scan_to_next_not_eol(LEX * lc) return token; } - /* * Format a scanner error message */ @@ -75,17 +84,40 @@ static void s_err(const char *file, int line, LEX *lc, const char *msg, ...) bvsnprintf(buf, sizeof(buf), msg, arg_ptr); va_end(arg_ptr); + if (lc->err_type == 0) { /* M_ERROR_TERM by default */ + lc->err_type = M_ERROR_TERM; + } + if (lc->line_no > lc->begin_line_no) { bsnprintf(more, sizeof(more), _("Problem probably begins at line %d.\n"), lc->begin_line_no); } else { more[0] = 0; - } - e_msg(file, line, M_ERROR_TERM, 0, _("Config error: %s\n" + } + if (lc->line_no > 0) { + e_msg(file, line, lc->err_type, 0, _("Config error: %s\n" " : line %d, col %d of file %s\n%s\n%s"), - buf, lc->line_no, lc->col_no, lc->fname, lc->line, more); + buf, lc->line_no, lc->col_no, lc->fname, lc->line, more); + } else { + e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf); + } } +void lex_set_default_error_handler(LEX *lf) +{ + lf->scan_error = s_err; +} + +/* + * Set err_type used in error_handler + * return the old value + */ +int lex_set_error_handler_error_type(LEX *lf, int err_type) +{ + int old = lf->err_type; + lf->err_type = err_type; + return old; +} /* * Free the current file, and retrieve the contents @@ -95,18 +127,24 @@ LEX *lex_close_file(LEX *lf) { LEX *of; - Dmsg1(2000, "Close lex file: %s\n", lf->fname); if (lf == NULL) { - Emsg0(M_ABORT, 0, "Close of NULL file\n"); + Emsg0(M_ABORT, 0, _("Close of NULL file\n")); } + Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname); + of = lf->next; - fclose(lf->fd); - Dmsg1(2000, "Close cfg file %s\n", lf->fname); + if (lf->bpipe) { + close_bpipe(lf->bpipe); + lf->bpipe = NULL; + } else { + fclose(lf->fd); + } + Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname); free(lf->fname); if (of) { of->options = lf->options; /* preserve options */ memcpy(lf, of, sizeof(LEX)); - Dmsg1(2000, "Restart scan of cfg file %s\n", of->fname); + Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname); } else { of = lf; lf = NULL; @@ -131,36 +169,48 @@ LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error) { LEX *nf; FILE *fd; + BPIPE *bpipe = NULL; char *fname = bstrdup(filename); - if ((fd = fopen(fname, "r")) == NULL) { - berrno be; - Emsg2(M_ERROR_TERM, 0, _("Cannot open config file %s: %s\n"), - fname, be.strerror()); - return NULL; /* Never reached if exit_on_error == 1 */ + if (fname[0] == '|') { + if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) { + free(fname); + return NULL; + } + fd = bpipe->rfd; + } else if ((fd = fopen(fname, "rb")) == NULL) { + free(fname); + return NULL; } - Dmsg1(2000, "Open config file: %s\n", fname); + Dmsg1(400, "Open config file: %s\n", fname); nf = (LEX *)malloc(sizeof(LEX)); if (lf) { memcpy(nf, lf, sizeof(LEX)); memset(lf, 0, sizeof(LEX)); lf->next = nf; /* if have lf, push it behind new one */ lf->options = nf->options; /* preserve user options */ + /* + * preserve err_type to prevent bacula exiting on 'reload' + * if config is invalid. Fixes bug #877 + */ + lf->err_type = nf->err_type; } else { lf = nf; /* start new packet */ memset(lf, 0, sizeof(LEX)); + lex_set_error_handler_error_type(lf, M_ERROR_TERM); } - lf->fd = fd; - lf->fname = fname; - lf->state = lex_none; - lf->ch = L_EOL; if (scan_error) { lf->scan_error = scan_error; } else { - lf->scan_error = s_err; + lex_set_default_error_handler(lf); } - Dmsg1(2000, "Return lex=%x\n", lf); + lf->fd = fd; + lf->bpipe = bpipe; + lf->fname = fname; + lf->state = lex_none; + lf->ch = L_EOL; + Dmsg1(dbglvl, "Return lex=%x\n", lf); return lf; } @@ -173,7 +223,8 @@ LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error) int lex_get_char(LEX *lf) { if (lf->ch == L_EOF) { - Emsg0(M_ABORT, 0, "get_char: called after EOF\n"); + Emsg0(M_ABORT, 0, _("get_char: called after EOF." + " You may have a open double quote without the closing double quote.\n")); } if (lf->ch == L_EOL) { if (bfgets(lf->line, MAXSTRING, lf->fd) == NULL) { @@ -185,22 +236,26 @@ int lex_get_char(LEX *lf) } lf->line_no++; lf->col_no = 0; + Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line); } - lf->ch = lf->line[lf->col_no]; + lf->ch = (uint8_t)lf->line[lf->col_no]; if (lf->ch == 0) { lf->ch = L_EOL; } else { lf->col_no++; } - Dmsg2(2000, "lex_get_char: %c %d\n", lf->ch, lf->ch); + Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch); return lf->ch; } void lex_unget_char(LEX *lf) { - lf->col_no--; - if (lf->ch == L_EOL) - lf->ch = 0; + if (lf->ch == L_EOL) { + lf->ch = 0; /* End of line, force read of next one */ + } else { + lf->col_no--; /* Backup to re-read char */ + } + } @@ -235,13 +290,17 @@ static void begin_str(LEX *lf, int ch) static const char *lex_state_to_str(int state) { switch (state) { - case lex_none: return "none"; - case lex_comment: return "comment"; - case lex_number: return "number"; - case lex_ip_addr: return "ip_addr"; - case lex_identifier: return "identifier"; - case lex_string: return "string"; - case lex_quoted_string: return "quoted_string"; + case lex_none: return _("none"); + case lex_comment: return _("comment"); + case lex_number: return _("number"); + case lex_ip_addr: return _("ip_addr"); + case lex_identifier: return _("identifier"); + case lex_string: return _("string"); + case lex_quoted_string: return _("quoted_string"); + case lex_include: return _("include"); + case lex_include_quoted_string: return _("include_quoted_string"); + case lex_utf8_bom: return _("UTF-8 Byte Order Mark"); + case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark"); default: return "??????"; } } @@ -269,6 +328,8 @@ const char *lex_tok_to_str(int token) case T_EOF: return "T_EOF"; case T_COMMA: return "T_COMMA"; case T_EOL: return "T_EOL"; + case T_UTF8_BOM: return "T_UTF8_BOM"; + case T_UTF16_BOM: return "T_UTF16_BOM"; default: return "??????"; } } @@ -283,13 +344,30 @@ static uint32_t scan_pint(LEX *lf, char *str) errno = 0; val = str_to_int64(str); if (errno != 0 || val < 0) { - scan_err1(lf, _("expected a postive integer number, got: %s"), str); + scan_err1(lf, _("expected a positive integer number, got: %s"), str); /* NOT REACHED */ } } return (uint32_t)val; } +static uint64_t scan_pint64(LEX *lf, char *str) +{ + uint64_t val = 0; + if (!is_a_number(str)) { + scan_err1(lf, _("expected a positive integer number, got: %s"), str); + /* NOT REACHED */ + } else { + errno = 0; + val = str_to_uint64(str); + if (errno != 0) { + scan_err1(lf, _("expected a positive integer number, got: %s"), str); + /* NOT REACHED */ + } + } + return val; +} + /* * * Get the next token from the input @@ -301,13 +379,20 @@ lex_get_token(LEX *lf, int expect) int ch; int token = T_NONE; bool esc_next = false; - - Dmsg0(2000, "enter lex_get_token\n"); + /* Unicode files, especially on Win32, may begin with a "Byte Order Mark" + to indicate which transmission format the file is in. The codepoint for + this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8 + and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian). + We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen + to tell which byte we are expecting. */ + int bom_bytes_seen = 0; + + Dmsg0(dbglvl, "enter lex_get_token\n"); while (token == T_NONE) { ch = lex_get_char(lf); switch (lf->state) { case lex_none: - Dmsg2(2000, "Lex state lex_none ch=%d,%x\n", ch, ch); + Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch); if (B_ISSPACE(ch)) break; if (B_ISALPHA(ch)) { @@ -328,11 +413,11 @@ lex_get_token(LEX *lf, int expect) begin_str(lf, ch); break; } - Dmsg0(2000, "Enter lex_none switch\n"); + Dmsg0(dbglvl, "Enter lex_none switch\n"); switch (ch) { case L_EOF: token = T_EOF; - Dmsg0(2000, "got L_EOF set token=T_EOF\n"); + Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n"); break; case '#': lf->state = lex_comment; @@ -363,7 +448,7 @@ lex_get_token(LEX *lf, int expect) } break; case L_EOL: - Dmsg0(2000, "got L_EOL set token=T_EOL\n"); + Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n"); if (expect != T_SKIP_EOL) { token = T_EOL; } @@ -372,6 +457,26 @@ lex_get_token(LEX *lf, int expect) lf->state = lex_include; begin_str(lf, 0); break; + case 0xEF: /* probably a UTF-8 BOM */ + case 0xFF: /* probably a UTF-16le BOM */ + case 0xFE: /* probably a UTF-16be BOM (error)*/ + if (lf->line_no != 1 || lf->col_no != 1) + { + lf->state = lex_string; + begin_str(lf, ch); + } else { + bom_bytes_seen = 1; + if (ch == 0xEF) { + lf->state = lex_utf8_bom; + } else if (ch == 0xFF) { + lf->state = lex_utf16_le_bom; + } else { + scan_err0(lf, _("This config file appears to be in an " + "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n")); + return T_ERROR; + } + } + break; default: lf->state = lex_string; begin_str(lf, ch); @@ -379,7 +484,7 @@ lex_get_token(LEX *lf, int expect) } break; case lex_comment: - Dmsg1(2000, "Lex state lex_comment ch=%x\n", ch); + Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch); if (ch == L_EOL) { lf->state = lex_none; if (expect != T_SKIP_EOL) { @@ -390,7 +495,7 @@ lex_get_token(LEX *lf, int expect) } break; case lex_number: - Dmsg2(2000, "Lex state lex_number ch=%x %c\n", ch, ch); + Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch); if (ch == L_EOF) { token = T_ERROR; break; @@ -415,10 +520,10 @@ lex_get_token(LEX *lf, int expect) token = T_ERROR; break; } - Dmsg1(2000, "Lex state lex_ip_addr ch=%x\n", ch); + Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch); break; case lex_string: - Dmsg1(2000, "Lex state lex_string ch=%x\n", ch); + Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch); if (ch == L_EOF) { token = T_ERROR; break; @@ -433,7 +538,7 @@ lex_get_token(LEX *lf, int expect) add_str(lf, ch); break; case lex_identifier: - Dmsg2(2000, "Lex state lex_identifier ch=%x %c\n", ch, ch); + Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch); if (B_ISALPHA(ch)) { add_str(lf, ch); break; @@ -456,7 +561,7 @@ lex_get_token(LEX *lf, int expect) add_str(lf, ch); break; case lex_quoted_string: - Dmsg2(2000, "Lex state lex_quoted_string ch=%x %c\n", ch, ch); + Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch); if (ch == L_EOF) { token = T_ERROR; break; @@ -481,27 +586,95 @@ lex_get_token(LEX *lf, int expect) } add_str(lf, ch); break; + case lex_include_quoted_string: + if (ch == L_EOF) { + token = T_ERROR; + break; + } + if (esc_next) { + add_str(lf, ch); + esc_next = false; + break; + } + if (ch == '\\') { + esc_next = true; + break; + } + if (ch == '"') { + /* Keep the original LEX so we can print an error if the included file can't be opened. */ + LEX* lfori = lf; + /* Skip the double quote when restarting parsing */ + lex_get_char(lf); + + lf->state = lex_none; + lf = lex_open_file(lf, lf->str, lf->scan_error); + if (lf == NULL) { + berrno be; + scan_err2(lfori, _("Cannot open included config file %s: %s\n"), + lfori->str, be.bstrerror()); + return T_ERROR; + } + break; + } + add_str(lf, ch); + break; case lex_include: /* scanning a filename */ if (ch == L_EOF) { token = T_ERROR; break; } + if (ch == '"') { + lf->state = lex_include_quoted_string; + break; + } + + if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' || ch == ';' || ch == ',' || ch == '"' || ch == '#') { + /* Keep the original LEX so we can print an error if the included file can't be opened. */ + LEX* lfori = lf; + lf->state = lex_none; - lf = lex_open_file(lf, lf->str, NULL); - if (lf == NULL) { - return T_ERROR; - } + lf = lex_open_file(lf, lf->str, lf->scan_error); + if (lf == NULL) { + berrno be; + scan_err2(lfori, _("Cannot open included config file %s: %s\n"), + lfori->str, be.bstrerror()); + return T_ERROR; + } break; } add_str(lf, ch); break; + case lex_utf8_bom: + /* we only end up in this state if we have read an 0xEF + as the first byte of the file, indicating we are probably + reading a UTF-8 file */ + if (ch == 0xBB && bom_bytes_seen == 1) { + bom_bytes_seen++; + } else if (ch == 0xBF && bom_bytes_seen == 2) { + token = T_UTF8_BOM; + lf->state = lex_none; + } else { + token = T_ERROR; + } + break; + case lex_utf16_le_bom: + /* we only end up in this state if we have read an 0xFF + as the first byte of the file -- indicating that we are + probably dealing with an Intel based (little endian) UTF-16 file*/ + if (ch == 0xFE) { + token = T_UTF16_BOM; + lf->state = lex_none; + } else { + token = T_ERROR; + } + break; } - Dmsg4(2000, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state), + Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state), lex_tok_to_str(token), ch); } - Dmsg2(2000, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token)); + Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token)); lf->token = token; /* @@ -555,7 +728,7 @@ lex_get_token(LEX *lf, int expect) break; case T_INT64: - Dmsg2(2000, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL)); + Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL)); if (token != T_NUMBER || !is_a_number(lf->str)) { scan_err2(lf, _("expected an integer number, got %s: %s"), lex_tok_to_str(token), lf->str); @@ -573,6 +746,26 @@ lex_get_token(LEX *lf, int expect) } break; + case T_PINT64_RANGE: + if (token == T_NUMBER) { + lf->pint64_val = scan_pint64(lf, lf->str); + lf->pint64_val2 = lf->pint64_val; + token = T_PINT64; + } else { + char *p = strchr(lf->str, '-'); + if (!p) { + scan_err2(lf, _("expected an integer or a range, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; + break; + } + *p++ = 0; /* terminate first half of range */ + lf->pint64_val = scan_pint64(lf, lf->str); + lf->pint64_val2 = scan_pint64(lf, p); + token = T_PINT64_RANGE; + } + break; + case T_NAME: if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) { scan_err2(lf, _("expected a name, got %s: %s"),