X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Fdird%2Fdird.c;h=be64a25307f1c10adee67b3c955b30a76ef0f131;hb=bb827dbd7f8f7421aace966d6c29e42399d01fbd;hp=8afc685041b221cf93077c2fc8eb82c34fa9afa1;hpb=859999bb97d24991e0168cadee991fb9afba4067;p=bacula%2Fbacula diff --git a/bacula/src/dird/dird.c b/bacula/src/dird/dird.c index 8afc685041..be64a25307 100644 --- a/bacula/src/dird/dird.c +++ b/bacula/src/dird/dird.c @@ -7,7 +7,7 @@ * Version $Id$ */ /* - Copyright (C) 2000-2003 Kern Sibbald and John Walker + Copyright (C) 2000-2004 Kern Sibbald and John Walker This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as @@ -32,10 +32,11 @@ /* Forward referenced subroutines */ static void terminate_dird(int sig); static int check_resources(); -static void reload_config(int sig); /* Exported subroutines */ +extern "C" void reload_config(int sig); + /* Imported subroutines */ JCR *wait_for_next_job(char *runjob); @@ -43,15 +44,17 @@ void term_scheduler(); void term_ua_server(); int do_backup(JCR *jcr); void backup_cleanup(void); -void start_UA_server(char *addr, int port); +void start_UA_server(dlist *addrs); void init_job_server(int max_workers); -void store_jobtype(LEX *lc, struct res_items *item, int index, int pass); -void store_level(LEX *lc, struct res_items *item, int index, int pass); -void store_replace(LEX *lc, struct res_items *item, int index, int pass); +void term_job_server(); +void store_jobtype(LEX *lc, RES_ITEM *item, int index, int pass); +void store_level(LEX *lc, RES_ITEM *item, int index, int pass); +void store_replace(LEX *lc, RES_ITEM *item, int index, int pass); static char *configfile = NULL; static char *runjob = NULL; static int background = 1; +static void init_reload(void); /* Globals Exported */ DIRRES *director; /* Director resource */ @@ -60,7 +63,9 @@ int SDConnectTimeout; /* Globals Imported */ extern int r_first, r_last; /* first and last resources */ -extern struct res_items job_items[]; +extern RES_TABLE resources[]; +extern RES **res_head; +extern RES_ITEM job_items[]; extern URES res_all; @@ -69,6 +74,7 @@ extern URES res_all; static void usage() { fprintf(stderr, _( +"Copyright (C) 2000-2004 Kern Sibbald and John Walker\n" "\nVersion: " VERSION " (" BDATE ")\n\n" "Usage: dird [-f -s] [-c config_file] [-d debug_level] [config_file]\n" " -c set configuration file to file\n" @@ -103,8 +109,9 @@ int main (int argc, char *argv[]) init_stack_dump(); my_name_is(argc, argv, "bacula-dir"); - textdomain("bacula-dir"); + textdomain("bacula"); init_msg(NULL, NULL); /* initialize message handler */ + init_reload(); daemon_start_time = time(NULL); while ((ch = getopt(argc, argv, "c:d:fg:r:stu:v?")) != -1) { @@ -207,11 +214,12 @@ int main (int argc, char *argv[]) } /* Create pid must come after we are a daemon -- so we have our final pid */ - create_pid_file(director->pid_directory, "bacula-dir", director->DIRport); + create_pid_file(director->pid_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); + read_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); drop(uid, gid); /* reduce priveleges if requested */ -/* signal(SIGHUP, reload_config); */ + signal(SIGHUP, reload_config); init_console_msg(working_directory); @@ -219,7 +227,7 @@ int main (int argc, char *argv[]) 4 /* UA */ + 4 /* sched+watchdog+jobsvr+misc */); Dmsg0(200, "Start UA server\n"); - start_UA_server(director->DIRaddr, director->DIRport); + start_UA_server(director->DIRaddrs); start_watchdog(); /* start network watchdog thread */ @@ -249,10 +257,11 @@ static void terminate_dird(int sig) exit(1); } already_here = TRUE; - delete_pid_file(director->pid_directory, "bacula-dir", - director->DIRport); + write_state_file(director->working_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); + delete_pid_file(director->pid_directory, "bacula-dir", get_first_port_host_order(director->DIRaddrs)); // signal(SIGCHLD, SIG_IGN); /* don't worry about children now */ term_scheduler(); + term_job_server(); if (runjob) { free(runjob); } @@ -267,47 +276,176 @@ static void terminate_dird(int sig) term_msg(); /* terminate message handler */ stop_watchdog(); close_memory_pool(); /* release free memory in pool */ - sm_dump(False); - exit(sig != 0); + sm_dump(false); + exit(sig); +} + +struct RELOAD_TABLE { + int job_count; + RES **res_table; +}; + +static const int max_reloads = 10; +static RELOAD_TABLE reload_table[max_reloads]; + +static void init_reload(void) +{ + for (int i=0; i < max_reloads; i++) { + reload_table[i].job_count = 0; + reload_table[i].res_table = NULL; + } +} + +static void free_saved_resources(int table) +{ + int num = r_last - r_first + 1; + RES **res_tab = reload_table[table].res_table; + if (!res_tab) { + Dmsg1(100, "res_tab for table %d already released.\n", table); + return; + } + Dmsg1(100, "Freeing resources for table %d\n", table); + for (int j=0; jJobId, + reload_id, reload_table[reload_id].job_count); + lock_jcr_chain(); + LockRes(); + if (--reload_table[reload_id].job_count <= 0) { + free_saved_resources(reload_id); + } + UnlockRes(); + unlock_jcr_chain(); +} + +static int find_free_reload_table_entry() +{ + int table = -1; + for (int i=0; i < max_reloads; i++) { + if (reload_table[i].res_table == NULL) { + table = i; + break; + } + } + return table; } /* * If we get here, we have received a SIGHUP, which means to - * reread our configuration file. + * reread our configuration file. * - * ***FIXME*** Check that there are no jobs running before - * doing this. + * The algorithm used is as follows: we count how many jobs are + * running and mark the running jobs to make a callback on + * exiting. The old config is saved with the reload table + * id in a reload table. The new config file is read. Now, as + * each job exits, it calls back to the reload_job_end_cb(), which + * decrements the count of open jobs for the given reload table. + * When the count goes to zero, we release those resources. + * This allows us to have pointers into the resource table (from + * jobs), and once they exit and all the pointers are released, we + * release the old table. Note, if no new jobs are running since the + * last reload, then the old resources will be immediately release. + * A console is considered a job because it may have pointers to + * resources, but a SYSTEM job is not since it *should* not have any + * permanent pointers to jobs. */ -static void reload_config(int sig) +extern "C" +void reload_config(int sig) { - static int already_here = FALSE; + static bool already_here = false; sigset_t set; + JCR *jcr; + int njobs = 0; /* number of running jobs */ + int table, rtable; if (already_here) { abort(); /* Oops, recursion -> die */ } - already_here = TRUE; - sigfillset(&set); + already_here = true; + sigemptyset(&set); + sigaddset(&set, SIGHUP); sigprocmask(SIG_BLOCK, &set, NULL); - free_config_resources(); +// Jmsg(NULL, M_INFO, 0, "Entering experimental reload config code. Bug reports will not be accepted.\n"); + + lock_jcr_chain(); + LockRes(); + + table = find_free_reload_table_entry(); + if (table < 0) { + Jmsg(NULL, M_ERROR, 0, _("Too many open reload requests. Request ignored.\n")); + goto bail_out; + } + + Dmsg1(100, "Reload_config njobs=%d\n", njobs); + reload_table[table].res_table = save_config_resources(); + Dmsg1(100, "Saved old config in table %d\n", table); parse_config(configfile); - Dmsg0(200, "check_resources()\n"); + Dmsg0(100, "Reloaded config file\n"); if (!check_resources()) { - Jmsg(NULL, M_ERROR_TERM, 0, _("Please correct configuration file: %s\n"), configfile); + rtable = find_free_reload_table_entry(); /* save new, bad table */ + if (rtable < 0) { + Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile); + Jmsg(NULL, M_ERROR_TERM, 0, _("Out of reload table entries. Giving up.\n")); + } else { + Jmsg(NULL, M_ERROR, 0, _("Please correct configuration file: %s\n"), configfile); + } + reload_table[rtable].res_table = save_config_resources(); + /* Now restore old resoure values */ + int num = r_last - r_first + 1; + RES **res_tab = reload_table[table].res_table; + for (int i=0; iJobType != JT_SYSTEM) { + reload_table[table].job_count++; + job_end_push(jcr, reload_job_end_cb, (void *)((long int)table)); + njobs++; + } + free_locked_jcr(jcr); + } } /* Reset globals */ set_working_directory(director->working_directory); FDConnectTimeout = director->FDConnectTimeout; SDConnectTimeout = director->SDConnectTimeout; - + Dmsg0(0, "Director's configuration file reread.\n"); + + /* Now release saved resources, if no jobs using the resources */ + if (njobs == 0) { + free_saved_resources(table); + } + +bail_out: + UnlockRes(); + unlock_jcr_chain(); sigprocmask(SIG_UNBLOCK, &set, NULL); signal(SIGHUP, reload_config); - already_here = FALSE; - Dmsg0(0, "Director's configuration file reread.\n"); + already_here = false; } /* @@ -381,7 +519,7 @@ Without that I don't know who I am :-(\n"), configfile); job->hdr.name, job_items[i].name, *def_svalue, i, offset); svalue = (char **)((char *)job + offset); if (*svalue) { - Dmsg1(000, "Hey something is wrong. p=0x%u\n", (unsigned)*svalue); + Pmsg1(000, "Hey something is wrong. p=0x%lu\n", *svalue); } *svalue = bstrdup(*def_svalue); set_bit(i, job->hdr.item_present); @@ -391,7 +529,7 @@ Without that I don't know who I am :-(\n"), configfile); job->hdr.name, job_items[i].name, i, offset); svalue = (char **)((char *)job + offset); if (*svalue) { - Dmsg1(000, "Hey something is wrong. p=0x%u\n", (unsigned)*svalue); + Pmsg1(000, "Hey something is wrong. p=0x%lu\n", *svalue); } *svalue = *def_svalue; set_bit(i, job->hdr.item_present); @@ -443,59 +581,62 @@ Without that I don't know who I am :-(\n"), configfile); Emsg0(M_ERROR_TERM, 0, "Too many items in Job resource\n"); } } - if (job->client && job->client->catalog) { - CAT *catalog = job->client->catalog; - B_DB *db; - - /* - * Make sure we can open catalog, otherwise print a warning - * message because the server is probably not running. - */ - db = db_init_database(NULL, catalog->db_name, catalog->db_user, - catalog->db_password, catalog->db_address, - catalog->db_port, catalog->db_socket); - if (!db || !db_open_database(NULL, db)) { - Jmsg(NULL, M_FATAL, 0, _("Could not open database \"%s\".\n"), - catalog->db_name); - if (db) { - Jmsg(NULL, M_FATAL, 0, _("%s"), db_strerror(db)); - } - OK = false; - } else { - /* If a pool is defined for this job, create the pool DB - * record if it is not already created. - */ - if (job->pool) { - create_pool(NULL, db, job->pool, POOL_OP_UPDATE); /* update request */ + } /* End loop over Job res */ + + /* Loop over databases */ + CAT *catalog; + foreach_res(catalog, R_CATALOG) { + B_DB *db; + /* + * Make sure we can open catalog, otherwise print a warning + * message because the server is probably not running. + */ + db = db_init_database(NULL, catalog->db_name, catalog->db_user, + catalog->db_password, catalog->db_address, + catalog->db_port, catalog->db_socket); + if (!db || !db_open_database(NULL, db)) { + Jmsg(NULL, M_FATAL, 0, _("Could not open database \"%s\".\n"), + catalog->db_name); + if (db) { + Jmsg(NULL, M_FATAL, 0, _("%s"), db_strerror(db)); + } + OK = false; + continue; + } + + /* Loop over all pools, defining/updating them in each database */ + POOL *pool; + foreach_res(pool, R_POOL) { + create_pool(NULL, db, pool, POOL_OP_UPDATE); /* update request */ + } + /* Loop over all counters, defining them in each database */ + + /* Set default value in all counters */ + COUNTER *counter; + foreach_res(counter, R_COUNTER) { + /* Write to catalog? */ + if (!counter->created && counter->Catalog == catalog) { + COUNTER_DBR cr; + bstrncpy(cr.Counter, counter->hdr.name, sizeof(cr.Counter)); + cr.MinValue = counter->MinValue; + cr.MaxValue = counter->MaxValue; + cr.CurrentValue = counter->MinValue; + if (counter->WrapCounter) { + bstrncpy(cr.WrapCounter, counter->WrapCounter->hdr.name, sizeof(cr.WrapCounter)); + } else { + cr.WrapCounter[0] = 0; /* empty string */ } - /* Set default value in all counters */ - COUNTER *counter; - foreach_res(counter, R_COUNTER) { - /* Write to catalog? */ - if (!counter->created && counter->Catalog == catalog) { - COUNTER_DBR cr; - bstrncpy(cr.Counter, counter->hdr.name, sizeof(cr.Counter)); - cr.MinValue = counter->MinValue; - cr.MaxValue = counter->MaxValue; - cr.CurrentValue = counter->MinValue; - if (counter->WrapCounter) { - bstrncpy(cr.WrapCounter, counter->WrapCounter->hdr.name, sizeof(cr.WrapCounter)); - } else { - cr.WrapCounter[0] = 0; /* empty string */ - } - if (db_create_counter_record(NULL, db, &cr)) { - counter->CurrentValue = cr.CurrentValue; - counter->created = true; - Dmsg2(100, "Create counter %s val=%d\n", counter->hdr.name, counter->CurrentValue); - } - } - if (!counter->created) { - counter->CurrentValue = counter->MinValue; /* default value */ - } + if (db_create_counter_record(NULL, db, &cr)) { + counter->CurrentValue = cr.CurrentValue; + counter->created = true; + Dmsg2(100, "Create counter %s val=%d\n", counter->hdr.name, counter->CurrentValue); } + } + if (!counter->created) { + counter->CurrentValue = counter->MinValue; /* default value */ } - db_close_database(NULL, db); } + db_close_database(NULL, db); } UnlockRes();