X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Flib%2Flex.c;h=03b64ac0d23498426259807bbc5a9c9bd7756be5;hb=2df589363f0c60f94ca9d5e856dfaf7cc5fcb178;hp=9474ab64443c215b3b687eb7bf56f4e8c58d28de;hpb=0a62e8eacaa64193bb3f901762d706487d302764;p=bacula%2Fbacula diff --git a/bacula/src/lib/lex.c b/bacula/src/lib/lex.c index 9474ab6444..03b64ac0d2 100644 --- a/bacula/src/lib/lex.c +++ b/bacula/src/lib/lex.c @@ -1,28 +1,37 @@ /* - * Lexical scanner for Bacula configuration file - * - * Version $Id$ - * - */ + Bacula® - The Network Backup Solution -/* - Copyright (C) 2000, 2001, 2002 Kern Sibbald and John Walker + Copyright (C) 2000-2008 Free Software Foundation Europe e.V. - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. + The main author of Bacula is Kern Sibbald, with contributions from + many others, a complete list can be found in the file AUTHORS. + This program is Free Software; you can redistribute it and/or + modify it under the terms of version two of the GNU General Public + License as published by the Free Software Foundation and included + in the file LICENSE. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - You should have received a copy of the GNU General Public - License along with this program; if not, write to the Free - Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, - MA 02111-1307, USA. + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + 02110-1301, USA. + Bacula® is a registered trademark of Kern Sibbald. + The licensor of Bacula is the Free Software Foundation Europe + (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich, + Switzerland, email:ftf@fsfeurope.org. +*/ +/* + * Lexical scanner for Bacula configuration file + * + * Kern Sibbald, 2000 + * + * Version $Id$ + * */ #include "bacula.h" @@ -30,23 +39,42 @@ extern int debug_level; +/* Debug level for this source file */ +static const int dbglvl = 5000; + /* * Scan to "logical" end of line. I.e. end of line, - * or semicolon. + * or semicolon, but stop on T_EOB (same as end of + * line except it is not eaten). */ void scan_to_eol(LEX *lc) { - Dmsg0(150, "start scan to eof\n"); - while (lex_get_token(lc, T_ALL) != T_EOL) - { } - Dmsg0(150, "done scan to eof\n"); + int token; + Dmsg0(dbglvl, "start scan to eof\n"); + while ((token = lex_get_token(lc, T_ALL)) != T_EOL) { + if (token == T_EOB) { + lex_unget_char(lc); + return; + } + } } - /* - * Format a scanner error message + * Get next token, but skip EOL */ -static void s_err(char *file, int line, LEX *lc, char *msg, ...) +int scan_to_next_not_eol(LEX * lc) +{ + int token; + do { + token = lex_get_token(lc, T_ALL); + } while (token == T_EOL); + return token; +} + +/* + * Format a scanner error message + */ +static void s_err(const char *file, int line, LEX *lc, const char *msg, ...) { va_list arg_ptr; char buf[MAXSTRING]; @@ -55,39 +83,68 @@ static void s_err(char *file, int line, LEX *lc, char *msg, ...) va_start(arg_ptr, msg); bvsnprintf(buf, sizeof(buf), msg, arg_ptr); va_end(arg_ptr); - + + if (lc->err_type == 0) { /* M_ERROR_TERM by default */ + lc->err_type = M_ERROR_TERM; + } + if (lc->line_no > lc->begin_line_no) { - sprintf(more, _("Problem probably begins at Line %d.\n"), lc->begin_line_no); + bsnprintf(more, sizeof(more), + _("Problem probably begins at line %d.\n"), lc->begin_line_no); } else { more[0] = 0; + } + if (lc->line_no > 0) { + e_msg(file, line, lc->err_type, 0, _("Config error: %s\n" +" : line %d, col %d of file %s\n%s\n%s"), + buf, lc->line_no, lc->col_no, lc->fname, lc->line, more); + } else { + e_msg(file, line, lc->err_type, 0, _("Config error: %s\n"), buf); } - e_msg(file, line, M_ERROR_TERM, 0, _("Config error: %s\n\ - : Line %d, col %d of file %s\n%s\n%s"), - buf, lc->line_no, lc->col_no, lc->fname, lc->line, more); } +void lex_set_default_error_handler(LEX *lf) +{ + lf->scan_error = s_err; +} + +/* + * Set err_type used in error_handler + * return the old value + */ +int lex_set_error_handler_error_type(LEX *lf, int err_type) +{ + int old = lf->err_type; + lf->err_type = err_type; + return old; +} /* * Free the current file, and retrieve the contents * of the previous packet if any. */ -LEX * -lex_close_file(LEX *lf) +LEX *lex_close_file(LEX *lf) { LEX *of; - Dmsg1(40, "Close lex file: %s\n", lf->fname); if (lf == NULL) { - Emsg0(M_ABORT, 0, "Close of NULL file\n"); + Emsg0(M_ABORT, 0, _("Close of NULL file\n")); } + Dmsg1(dbglvl, "Close lex file: %s\n", lf->fname); + of = lf->next; - fclose(lf->fd); - Dmsg1(49, "Close cfg file %s\n", lf->fname); + if (lf->bpipe) { + close_bpipe(lf->bpipe); + lf->bpipe = NULL; + } else { + fclose(lf->fd); + } + Dmsg1(dbglvl, "Close cfg file %s\n", lf->fname); free(lf->fname); if (of) { of->options = lf->options; /* preserve options */ memcpy(lf, of, sizeof(LEX)); - Dmsg1(49, "Restart scan of cfg file %s\n", of->fname); + Dmsg1(dbglvl, "Restart scan of cfg file %s\n", of->fname); } else { of = lf; lf = NULL; @@ -96,7 +153,7 @@ lex_close_file(LEX *lf) return lf; } -/* +/* * Open a new configuration file. We push the * state of the current file (lf) so that we * can do includes. This is a bit of a hammer. @@ -107,82 +164,98 @@ lex_close_file(LEX *lf) * the next field. * */ -LEX * -lex_open_file(LEX *lf, char *filename, LEX_ERROR_HANDLER *scan_error) - +LEX *lex_open_file(LEX *lf, const char *filename, LEX_ERROR_HANDLER *scan_error) + { LEX *nf; FILE *fd; + BPIPE *bpipe = NULL; char *fname = bstrdup(filename); - - if ((fd = fopen(fname, "r")) == NULL) { - Emsg2(M_ERROR_TERM, 0, _("Cannot open config file %s: %s\n"), - fname, strerror(errno)); + + if (fname[0] == '|') { + if ((bpipe = open_bpipe(fname+1, 0, "rb")) == NULL) { + free(fname); + return NULL; + } + fd = bpipe->rfd; + } else if ((fd = fopen(fname, "rb")) == NULL) { + free(fname); + return NULL; } - Dmsg1(49, "Open config file: %s\n", fname); + Dmsg1(400, "Open config file: %s\n", fname); nf = (LEX *)malloc(sizeof(LEX)); - if (lf) { + if (lf) { memcpy(nf, lf, sizeof(LEX)); memset(lf, 0, sizeof(LEX)); - lf->next = nf; /* if have lf, push it behind new one */ + lf->next = nf; /* if have lf, push it behind new one */ lf->options = nf->options; /* preserve user options */ + /* + * preserve err_type to prevent bacula exiting on 'reload' + * if config is invalid. Fixes bug #877 + */ + lf->err_type = nf->err_type; } else { - lf = nf; /* start new packet */ + lf = nf; /* start new packet */ memset(lf, 0, sizeof(LEX)); + lex_set_error_handler_error_type(lf, M_ERROR_TERM); } - lf->fd = fd; - lf->fname = fname; - lf->state = lex_none; - lf->ch = L_EOL; if (scan_error) { lf->scan_error = scan_error; } else { - lf->scan_error = s_err; + lex_set_default_error_handler(lf); } - Dmsg1(49, "Return lex=%x\n", lf); + lf->fd = fd; + lf->bpipe = bpipe; + lf->fname = fname; + lf->state = lex_none; + lf->ch = L_EOL; + Dmsg1(dbglvl, "Return lex=%x\n", lf); return lf; } -/* +/* * Get the next character from the input. * Returns the character or * L_EOF if end of file * L_EOL if end of line */ -int -lex_get_char(LEX *lf) +int lex_get_char(LEX *lf) { if (lf->ch == L_EOF) { - Emsg0(M_ABORT, 0, "get_char: called after EOF\n"); + Emsg0(M_ABORT, 0, _("get_char: called after EOF." + " You may have a open double quote without the closing double quote.\n")); } if (lf->ch == L_EOL) { - if (fgets(lf->line, MAXSTRING, lf->fd) == NULL) { - lf->ch = L_EOF; - if (lf->next) { - lex_close_file(lf); - } - return lf->ch; + if (bfgets(lf->line, MAXSTRING, lf->fd) == NULL) { + lf->ch = L_EOF; + if (lf->next) { + lex_close_file(lf); + } + return lf->ch; } lf->line_no++; lf->col_no = 0; + Dmsg2(1000, "fget line=%d %s", lf->line_no, lf->line); } - lf->ch = lf->line[lf->col_no]; + lf->ch = (uint8_t)lf->line[lf->col_no]; if (lf->ch == 0) { lf->ch = L_EOL; } else { lf->col_no++; } - Dmsg2(900, "lex_get_char: %c %d\n", lf->ch, lf->ch); + Dmsg2(dbglvl, "lex_get_char: %c %d\n", lf->ch, lf->ch); return lf->ch; } -void -lex_unget_char(LEX *lf) +void lex_unget_char(LEX *lf) { - lf->col_no--; - if (lf->ch == L_EOL) - lf->ch = 0; + if (lf->ch == L_EOL) { + lf->ch = 0; /* End of line, force read of next one */ + } else { + lf->col_no--; /* Backup to re-read char */ + } + } @@ -193,8 +266,8 @@ static void add_str(LEX *lf, int ch) { if (lf->str_len >= MAXSTRING-3) { Emsg3(M_ERROR_TERM, 0, _( - "Token too long, file: %s, line %d, begins at line %d\n"), - lf->fname, lf->line_no, lf->begin_line_no); + _("Config token too long, file: %s, line %d, begins at line %d\n")), + lf->fname, lf->line_no, lf->begin_line_no); } lf->str[lf->str_len++] = ch; lf->str[lf->str_len] = 0; @@ -203,7 +276,7 @@ static void add_str(LEX *lf, int ch) /* * Begin the string */ -static void begin_str(LEX *lf, int ch) +static void begin_str(LEX *lf, int ch) { lf->str_len = 0; lf->str[0] = 0; @@ -214,18 +287,21 @@ static void begin_str(LEX *lf, int ch) } #ifdef DEBUG -static char * -lex_state_to_str(int state) +static const char *lex_state_to_str(int state) { switch (state) { - case lex_none: return "none"; - case lex_comment: return "comment"; - case lex_number: return "number"; - case lex_ip_addr: return "ip_addr"; - case lex_identifier: return "identifier"; - case lex_string: return "string"; - case lex_quoted_string: return "quoted_string"; - default: return "??????"; + case lex_none: return _("none"); + case lex_comment: return _("comment"); + case lex_number: return _("number"); + case lex_ip_addr: return _("ip_addr"); + case lex_identifier: return _("identifier"); + case lex_string: return _("string"); + case lex_quoted_string: return _("quoted_string"); + case lex_include: return _("include"); + case lex_include_quoted_string: return _("include_quoted_string"); + case lex_utf8_bom: return _("UTF-8 Byte Order Mark"); + case lex_utf16_le_bom: return _("UTF-16le Byte Order Mark"); + default: return "??????"; } } #endif @@ -234,48 +310,66 @@ lex_state_to_str(int state) * Convert a lex token to a string * used for debug/error printing. */ -char * -lex_tok_to_str(int token) +const char *lex_tok_to_str(int token) { switch(token) { - case L_EOF: return "L_EOF"; - case L_EOL: return "L_EOL"; - case T_NONE: return "T_NONE"; - case T_NUMBER: return "T_NUMBER"; - case T_IPADDR: return "T_IPADDR"; - case T_IDENTIFIER: return "T_IDENTIFIER"; - case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING"; - case T_QUOTED_STRING: return "T_QUOTED_STRING"; - case T_BOB: return "T_BOB"; - case T_EOB: return "T_EOB"; - case T_EQUALS: return "T_EQUALS"; - case T_ERROR: return "T_ERROR"; - case T_EOF: return "T_EOF"; - case T_COMMA: return "T_COMMA"; - case T_EOL: return "T_EOL"; - default: return "??????"; + case L_EOF: return "L_EOF"; + case L_EOL: return "L_EOL"; + case T_NONE: return "T_NONE"; + case T_NUMBER: return "T_NUMBER"; + case T_IPADDR: return "T_IPADDR"; + case T_IDENTIFIER: return "T_IDENTIFIER"; + case T_UNQUOTED_STRING: return "T_UNQUOTED_STRING"; + case T_QUOTED_STRING: return "T_QUOTED_STRING"; + case T_BOB: return "T_BOB"; + case T_EOB: return "T_EOB"; + case T_EQUALS: return "T_EQUALS"; + case T_ERROR: return "T_ERROR"; + case T_EOF: return "T_EOF"; + case T_COMMA: return "T_COMMA"; + case T_EOL: return "T_EOL"; + case T_UTF8_BOM: return "T_UTF8_BOM"; + case T_UTF16_BOM: return "T_UTF16_BOM"; + default: return "??????"; } } static uint32_t scan_pint(LEX *lf, char *str) { - double dval = 0; + int64_t val = 0; + if (!is_a_number(str)) { + scan_err1(lf, _("expected a positive integer number, got: %s"), str); + /* NOT REACHED */ + } else { + errno = 0; + val = str_to_int64(str); + if (errno != 0 || val < 0) { + scan_err1(lf, _("expected a positive integer number, got: %s"), str); + /* NOT REACHED */ + } + } + return (uint32_t)val; +} + +static uint64_t scan_pint64(LEX *lf, char *str) +{ + uint64_t val = 0; if (!is_a_number(str)) { - scan_err1(lf, "expected a positive integer number, got: %s", str); + scan_err1(lf, _("expected a positive integer number, got: %s"), str); /* NOT REACHED */ } else { errno = 0; - dval = strtod(str, NULL); - if (errno != 0 || dval < 0) { - scan_err1(lf, "expected a postive integer number, got: %s", str); - /* NOT REACHED */ + val = str_to_uint64(str); + if (errno != 0) { + scan_err1(lf, _("expected a positive integer number, got: %s"), str); + /* NOT REACHED */ } } - return (uint32_t)dval; + return val; } -/* - * +/* + * * Get the next token from the input * */ @@ -284,198 +378,307 @@ lex_get_token(LEX *lf, int expect) { int ch; int token = T_NONE; - int esc_next = FALSE; - - Dmsg0(290, "enter lex_get_token\n"); + bool esc_next = false; + /* Unicode files, especially on Win32, may begin with a "Byte Order Mark" + to indicate which transmission format the file is in. The codepoint for + this mark is U+FEFF and is represented as the octets EF-BB-BF in UTF-8 + and as FF-FE in UTF-16le(little endian) and FE-FF in UTF-16(big endian). + We use a distinct state for UTF-8 and UTF-16le, and use bom_bytes_seen + to tell which byte we are expecting. */ + int bom_bytes_seen = 0; + + Dmsg0(dbglvl, "enter lex_get_token\n"); while (token == T_NONE) { ch = lex_get_char(lf); switch (lf->state) { - case lex_none: - Dmsg2(290, "Lex state lex_none ch=%d,%x\n", ch, ch); - if (B_ISSPACE(ch)) - break; - if (B_ISALPHA(ch)) { - if (lf->options & LOPT_NO_IDENT) - lf->state = lex_string; - else - lf->state = lex_identifier; - begin_str(lf, ch); - break; - } - if (B_ISDIGIT(ch)) { - lf->state = lex_number; - begin_str(lf, ch); - break; - } - Dmsg0(290, "Enter lex_none switch\n"); - switch (ch) { - case L_EOF: - token = T_EOF; - Dmsg0(290, "got L_EOF set token=T_EOF\n"); - break; - case '#': - lf->state = lex_comment; - break; - case '{': - token = T_BOB; - begin_str(lf, ch); - break; - case '}': - token = T_EOB; - begin_str(lf, ch); - break; - case '"': - lf->state = lex_quoted_string; - begin_str(lf, 0); - break; - case '=': - token = T_EQUALS; - begin_str(lf, ch); - break; - case ',': - token = T_COMMA; - begin_str(lf, ch); - break; - case ';': - token = T_EOL; /* treat ; like EOL */ - break; - case L_EOL: - Dmsg0(290, "got L_EOL set token=T_EOL\n"); - token = T_EOL; - break; - case '@': - lf->state = lex_include; - begin_str(lf, 0); - break; - default: - lf->state = lex_string; - begin_str(lf, ch); - break; - } - break; - case lex_comment: - Dmsg1(290, "Lex state lex_comment ch=%x\n", ch); - if (ch == L_EOL) { - lf->state = lex_none; - token = T_EOL; - } else if (ch == L_EOF) { - token = T_ERROR; - } - break; - case lex_number: - Dmsg2(290, "Lex state lex_number ch=%x %c\n", ch, ch); - if (ch == L_EOF) { - token = T_ERROR; - break; - } - /* Might want to allow trailing specifications here */ - if (B_ISDIGIT(ch)) { - add_str(lf, ch); - break; - } - - /* A valid number can be terminated by the following */ - if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') { - token = T_NUMBER; - lf->state = lex_none; - } else { - lf->state = lex_string; - } - lex_unget_char(lf); - break; - case lex_ip_addr: - if (ch == L_EOF) { - token = T_ERROR; - break; - } - Dmsg1(290, "Lex state lex_ip_addr ch=%x\n", ch); - break; - case lex_string: - Dmsg1(290, "Lex state lex_string ch=%x\n", ch); - if (ch == L_EOF) { - token = T_ERROR; - break; - } - if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' || - ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) { - lex_unget_char(lf); - token = T_UNQUOTED_STRING; - lf->state = lex_none; - break; - } - add_str(lf, ch); - break; - case lex_identifier: - Dmsg2(290, "Lex state lex_identifier ch=%x %c\n", ch, ch); - if (B_ISALPHA(ch)) { - add_str(lf, ch); - break; - } else if (B_ISSPACE(ch)) { - break; - } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' || - ch == ';' || ch == ',' || ch == '"' || ch == '#') { - lex_unget_char(lf); - token = T_IDENTIFIER; - lf->state = lex_none; - break; - } else if (ch == L_EOF) { - token = T_ERROR; - lf->state = lex_none; - begin_str(lf, ch); - break; - } - /* Some non-alpha character => string */ - lf->state = lex_string; - add_str(lf, ch); - break; - case lex_quoted_string: - Dmsg2(290, "Lex state lex_quoted_string ch=%x %c\n", ch, ch); - if (ch == L_EOF) { - token = T_ERROR; - break; - } - if (ch == L_EOL) { - esc_next = FALSE; - break; - } - if (esc_next) { - add_str(lf, ch); - esc_next = FALSE; - break; - } - if (ch == '\\') { - esc_next = TRUE; - break; - } - if (ch == '"') { - token = T_QUOTED_STRING; - lf->state = lex_none; - break; - } - add_str(lf, ch); - break; - case lex_include: /* scanning a filename */ - if (ch == L_EOF) { - token = T_ERROR; - break; - } - if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' || - ch == ';' || ch == ',' || ch == '"' || ch == '#') { - lf->state = lex_none; - lf = lex_open_file(lf, lf->str, NULL); - break; - } - add_str(lf, ch); - break; + case lex_none: + Dmsg2(dbglvl, "Lex state lex_none ch=%d,%x\n", ch, ch); + if (B_ISSPACE(ch)) + break; + if (B_ISALPHA(ch)) { + if (lf->options & LOPT_NO_IDENT || lf->options & LOPT_STRING) { + lf->state = lex_string; + } else { + lf->state = lex_identifier; + } + begin_str(lf, ch); + break; + } + if (B_ISDIGIT(ch)) { + if (lf->options & LOPT_STRING) { + lf->state = lex_string; + } else { + lf->state = lex_number; + } + begin_str(lf, ch); + break; + } + Dmsg0(dbglvl, "Enter lex_none switch\n"); + switch (ch) { + case L_EOF: + token = T_EOF; + Dmsg0(dbglvl, "got L_EOF set token=T_EOF\n"); + break; + case '#': + lf->state = lex_comment; + break; + case '{': + token = T_BOB; + begin_str(lf, ch); + break; + case '}': + token = T_EOB; + begin_str(lf, ch); + break; + case '"': + lf->state = lex_quoted_string; + begin_str(lf, 0); + break; + case '=': + token = T_EQUALS; + begin_str(lf, ch); + break; + case ',': + token = T_COMMA; + begin_str(lf, ch); + break; + case ';': + if (expect != T_SKIP_EOL) { + token = T_EOL; /* treat ; like EOL */ + } + break; + case L_EOL: + Dmsg0(dbglvl, "got L_EOL set token=T_EOL\n"); + if (expect != T_SKIP_EOL) { + token = T_EOL; + } + break; + case '@': + lf->state = lex_include; + begin_str(lf, 0); + break; + case 0xEF: /* probably a UTF-8 BOM */ + case 0xFF: /* probably a UTF-16le BOM */ + case 0xFE: /* probably a UTF-16be BOM (error)*/ + if (lf->line_no != 1 || lf->col_no != 1) + { + lf->state = lex_string; + begin_str(lf, ch); + } else { + bom_bytes_seen = 1; + if (ch == 0xEF) { + lf->state = lex_utf8_bom; + } else if (ch == 0xFF) { + lf->state = lex_utf16_le_bom; + } else { + scan_err0(lf, _("This config file appears to be in an " + "unsupported Unicode format (UTF-16be). Please resave as UTF-8\n")); + return T_ERROR; + } + } + break; + default: + lf->state = lex_string; + begin_str(lf, ch); + break; + } + break; + case lex_comment: + Dmsg1(dbglvl, "Lex state lex_comment ch=%x\n", ch); + if (ch == L_EOL) { + lf->state = lex_none; + if (expect != T_SKIP_EOL) { + token = T_EOL; + } + } else if (ch == L_EOF) { + token = T_ERROR; + } + break; + case lex_number: + Dmsg2(dbglvl, "Lex state lex_number ch=%x %c\n", ch, ch); + if (ch == L_EOF) { + token = T_ERROR; + break; + } + /* Might want to allow trailing specifications here */ + if (B_ISDIGIT(ch)) { + add_str(lf, ch); + break; + } + + /* A valid number can be terminated by the following */ + if (B_ISSPACE(ch) || ch == L_EOL || ch == ',' || ch == ';') { + token = T_NUMBER; + lf->state = lex_none; + } else { + lf->state = lex_string; + } + lex_unget_char(lf); + break; + case lex_ip_addr: + if (ch == L_EOF) { + token = T_ERROR; + break; + } + Dmsg1(dbglvl, "Lex state lex_ip_addr ch=%x\n", ch); + break; + case lex_string: + Dmsg1(dbglvl, "Lex state lex_string ch=%x\n", ch); + if (ch == L_EOF) { + token = T_ERROR; + break; + } + if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' || + ch == '\r' || ch == ';' || ch == ',' || ch == '#' || (B_ISSPACE(ch)) ) { + lex_unget_char(lf); + token = T_UNQUOTED_STRING; + lf->state = lex_none; + break; + } + add_str(lf, ch); + break; + case lex_identifier: + Dmsg2(dbglvl, "Lex state lex_identifier ch=%x %c\n", ch, ch); + if (B_ISALPHA(ch)) { + add_str(lf, ch); + break; + } else if (B_ISSPACE(ch)) { + break; + } else if (ch == '\n' || ch == L_EOL || ch == '=' || ch == '}' || ch == '{' || + ch == '\r' || ch == ';' || ch == ',' || ch == '"' || ch == '#') { + lex_unget_char(lf); + token = T_IDENTIFIER; + lf->state = lex_none; + break; + } else if (ch == L_EOF) { + token = T_ERROR; + lf->state = lex_none; + begin_str(lf, ch); + break; + } + /* Some non-alpha character => string */ + lf->state = lex_string; + add_str(lf, ch); + break; + case lex_quoted_string: + Dmsg2(dbglvl, "Lex state lex_quoted_string ch=%x %c\n", ch, ch); + if (ch == L_EOF) { + token = T_ERROR; + break; + } + if (ch == L_EOL) { + esc_next = false; + break; + } + if (esc_next) { + add_str(lf, ch); + esc_next = false; + break; + } + if (ch == '\\') { + esc_next = true; + break; + } + if (ch == '"') { + token = T_QUOTED_STRING; + lf->state = lex_none; + break; + } + add_str(lf, ch); + break; + case lex_include_quoted_string: + if (ch == L_EOF) { + token = T_ERROR; + break; + } + if (esc_next) { + add_str(lf, ch); + esc_next = false; + break; + } + if (ch == '\\') { + esc_next = true; + break; + } + if (ch == '"') { + /* Keep the original LEX so we can print an error if the included file can't be opened. */ + LEX* lfori = lf; + /* Skip the double quote when restarting parsing */ + lex_get_char(lf); + + lf->state = lex_none; + lf = lex_open_file(lf, lf->str, lf->scan_error); + if (lf == NULL) { + berrno be; + scan_err2(lfori, _("Cannot open included config file %s: %s\n"), + lfori->str, be.bstrerror()); + return T_ERROR; + } + break; + } + add_str(lf, ch); + break; + case lex_include: /* scanning a filename */ + if (ch == L_EOF) { + token = T_ERROR; + break; + } + if (ch == '"') { + lf->state = lex_include_quoted_string; + break; + } + + + if (B_ISSPACE(ch) || ch == '\n' || ch == L_EOL || ch == '}' || ch == '{' || + ch == ';' || ch == ',' || ch == '"' || ch == '#') { + /* Keep the original LEX so we can print an error if the included file can't be opened. */ + LEX* lfori = lf; + + lf->state = lex_none; + lf = lex_open_file(lf, lf->str, lf->scan_error); + if (lf == NULL) { + berrno be; + scan_err2(lfori, _("Cannot open included config file %s: %s\n"), + lfori->str, be.bstrerror()); + return T_ERROR; + } + break; + } + add_str(lf, ch); + break; + case lex_utf8_bom: + /* we only end up in this state if we have read an 0xEF + as the first byte of the file, indicating we are probably + reading a UTF-8 file */ + if (ch == 0xBB && bom_bytes_seen == 1) { + bom_bytes_seen++; + } else if (ch == 0xBF && bom_bytes_seen == 2) { + token = T_UTF8_BOM; + lf->state = lex_none; + } else { + token = T_ERROR; + } + break; + case lex_utf16_le_bom: + /* we only end up in this state if we have read an 0xFF + as the first byte of the file -- indicating that we are + probably dealing with an Intel based (little endian) UTF-16 file*/ + if (ch == 0xFE) { + token = T_UTF16_BOM; + lf->state = lex_none; + } else { + token = T_ERROR; + } + break; } - Dmsg4(290, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state), - lex_tok_to_str(token), ch); + Dmsg4(dbglvl, "ch=%d state=%s token=%s %c\n", ch, lex_state_to_str(lf->state), + lex_tok_to_str(token), ch); } - Dmsg2(290, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token)); + Dmsg2(dbglvl, "lex returning: line %d token: %s\n", lf->line_no, lex_tok_to_str(token)); lf->token = token; - /* - * Here is where we check to see if the user has set certain + /* + * Here is where we check to see if the user has set certain * expectations (e.g. 32 bit integer). If so, we do type checking * and possible additional scanning (e.g. for range). */ @@ -488,89 +691,107 @@ lex_get_token(LEX *lf, int expect) case T_PINT32_RANGE: if (token == T_NUMBER) { - lf->pint32_val = scan_pint(lf, lf->str); - lf->pint32_val2 = lf->pint32_val; - token = T_PINT32; + lf->pint32_val = scan_pint(lf, lf->str); + lf->pint32_val2 = lf->pint32_val; + token = T_PINT32; } else { char *p = strchr(lf->str, '-'); - if (!p) { - scan_err2(lf, "expected an integer or a range, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; - break; - } - *p++ = 0; /* terminate first half of range */ - lf->pint32_val = scan_pint(lf, lf->str); - lf->pint32_val2 = scan_pint(lf, p); - token = T_PINT32_RANGE; + if (!p) { + scan_err2(lf, _("expected an integer or a range, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; + break; + } + *p++ = 0; /* terminate first half of range */ + lf->pint32_val = scan_pint(lf, lf->str); + lf->pint32_val2 = scan_pint(lf, p); + token = T_PINT32_RANGE; } break; case T_INT32: if (token != T_NUMBER || !is_a_number(lf->str)) { - scan_err2(lf, "expected an integer number, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; - break; + scan_err2(lf, _("expected an integer number, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; + break; } errno = 0; - lf->int32_val = (int32_t)strtod(lf->str, NULL); + lf->int32_val = (int32_t)str_to_int64(lf->str); if (errno != 0) { - scan_err2(lf, "expected an integer number, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; + scan_err2(lf, _("expected an integer number, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; } else { - token = T_INT32; + token = T_INT32; } break; case T_INT64: - Dmsg2(400, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL)); + Dmsg2(dbglvl, "int64=:%s: %f\n", lf->str, strtod(lf->str, NULL)); if (token != T_NUMBER || !is_a_number(lf->str)) { - scan_err2(lf, "expected an integer number, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; - break; + scan_err2(lf, _("expected an integer number, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; + break; } errno = 0; - lf->int64_val = (int64_t)strtod(lf->str, NULL); + lf->int64_val = str_to_int64(lf->str); if (errno != 0) { - scan_err2(lf, "expected an integer number, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; + scan_err2(lf, _("expected an integer number, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; } else { - token = T_INT64; + token = T_INT64; + } + break; + + case T_PINT64_RANGE: + if (token == T_NUMBER) { + lf->pint64_val = scan_pint64(lf, lf->str); + lf->pint64_val2 = lf->pint64_val; + token = T_PINT64; + } else { + char *p = strchr(lf->str, '-'); + if (!p) { + scan_err2(lf, _("expected an integer or a range, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; + break; + } + *p++ = 0; /* terminate first half of range */ + lf->pint64_val = scan_pint64(lf, lf->str); + lf->pint64_val2 = scan_pint64(lf, p); + token = T_PINT64_RANGE; } break; case T_NAME: if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) { - scan_err2(lf, "expected a name, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; + scan_err2(lf, _("expected a name, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; } else if (lf->str_len > MAX_RES_NAME_LENGTH) { - scan_err3(lf, "name %s length %d too long, max is %d\n", lf->str, - lf->str_len, MAX_RES_NAME_LENGTH); - token = T_ERROR; - } else { - token = T_NAME; + scan_err3(lf, _("name %s length %d too long, max is %d\n"), lf->str, + lf->str_len, MAX_RES_NAME_LENGTH); + token = T_ERROR; } break; case T_STRING: if (token != T_IDENTIFIER && token != T_UNQUOTED_STRING && token != T_QUOTED_STRING) { - scan_err2(lf, "expected a string, got %s: %s", - lex_tok_to_str(token), lf->str); - token = T_ERROR; + scan_err2(lf, _("expected a string, got %s: %s"), + lex_tok_to_str(token), lf->str); + token = T_ERROR; } else { - token = T_STRING; + token = T_STRING; } break; default: - break; /* no expectation given */ + break; /* no expectation given */ } - lf->token = token; /* set possible new token */ + lf->token = token; /* set possible new token */ return token; }