X-Git-Url: https://git.sur5r.net/?a=blobdiff_plain;f=bacula%2Fsrc%2Flib%2Fjcr.c;h=7bf2ac08e59d5983999fb7b700ccf647209d2f2f;hb=a94fde1ec3ad3d24fa562364fa692315797f200d;hp=c31ff6f11f62d6768e9c96b7aff392cfc4ba612d;hpb=4f10b6cdd0641f4c7047e34fac3030be16f9bf6c;p=bacula%2Fbacula diff --git a/bacula/src/lib/jcr.c b/bacula/src/lib/jcr.c index c31ff6f11f..7bf2ac08e5 100644 --- a/bacula/src/lib/jcr.c +++ b/bacula/src/lib/jcr.c @@ -1,7 +1,7 @@ /* Bacula® - The Network Backup Solution - Copyright (C) 2000-2008 Free Software Foundation Europe e.V. + Copyright (C) 2000-2009 Free Software Foundation Europe e.V. The main author of Bacula is Kern Sibbald, with contributions from many others, a complete list can be found in the file AUTHORS. @@ -55,16 +55,7 @@ const int dbglvl = 3400; -/* - * Setting a NULL in tsd doesn't clear the tsd but instead tells - * pthreads not to call the tsd destructor. Consequently, we - * define this *invalid* jcr address and stuff it in the tsd - * when the jcr is no longer valid. - */ -#define INVALID_JCR ((JCR *)(-1)) - /* External variables we reference */ -extern time_t watchdog_time; /* External referenced functions */ void free_bregexps(alist *bregexps); @@ -124,6 +115,7 @@ void init_last_jobs_list() void term_last_jobs_list() { if (last_jobs) { + lock_last_jobs_list(); while (!last_jobs->empty()) { void *je = last_jobs->first(); last_jobs->remove(je); @@ -131,6 +123,7 @@ void term_last_jobs_list() } delete last_jobs; last_jobs = NULL; + unlock_last_jobs_list(); } if (jcrs) { delete jcrs; @@ -142,9 +135,10 @@ bool read_last_jobs_list(int fd, uint64_t addr) { struct s_last_job *je, job; uint32_t num; + bool ok = true; Dmsg1(100, "read_last_jobs seek to %d\n", (int)addr); - if (addr == 0 || lseek(fd, (off_t)addr, SEEK_SET) < 0) { + if (addr == 0 || lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return false; } if (read(fd, &num, sizeof(num)) != sizeof(num)) { @@ -154,11 +148,13 @@ bool read_last_jobs_list(int fd, uint64_t addr) if (num > 4 * max_last_jobs) { /* sanity check */ return false; } + lock_last_jobs_list(); for ( ; num; num--) { if (read(fd, &job, sizeof(job)) != sizeof(job)) { berrno be; Pmsg1(000, "Read job entry. ERR=%s\n", be.bstrerror()); - return false; + ok = false; + break; } if (job.JobId > 0) { je = (struct s_last_job *)malloc(sizeof(struct s_last_job)); @@ -174,41 +170,48 @@ bool read_last_jobs_list(int fd, uint64_t addr) } } } - return true; + unlock_last_jobs_list(); + return ok; } uint64_t write_last_jobs_list(int fd, uint64_t addr) { struct s_last_job *je; uint32_t num; + ssize_t stat; Dmsg1(100, "write_last_jobs seek to %d\n", (int)addr); - if (lseek(fd, (off_t)addr, SEEK_SET) < 0) { + if (lseek(fd, (boffset_t)addr, SEEK_SET) < 0) { return 0; } if (last_jobs) { + lock_last_jobs_list(); /* First record is number of entires */ num = last_jobs->size(); if (write(fd, &num, sizeof(num)) != sizeof(num)) { berrno be; Pmsg1(000, "Error writing num_items: ERR=%s\n", be.bstrerror()); - return 0; + goto bail_out; } foreach_dlist(je, last_jobs) { if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) { berrno be; Pmsg1(000, "Error writing job: ERR=%s\n", be.bstrerror()); - return 0; + goto bail_out; } } + unlock_last_jobs_list(); } /* Return current address */ - ssize_t stat = lseek(fd, 0, SEEK_CUR); + stat = lseek(fd, 0, SEEK_CUR); if (stat < 0) { stat = 0; } return stat; +bail_out: + unlock_last_jobs_list(); + return 0; } void lock_last_jobs_list() @@ -221,16 +224,50 @@ void unlock_last_jobs_list() V(last_jobs_mutex); } -/* Set Job type in JCR and also set appropriate read flag */ -void JCR::set_JobType(int32_t JobType) +/* Get an ASCII representation of the Operation being performed as an english Noun */ +const char *JCR::get_OperationName() { - m_JobType = JobType; + switch(m_JobType) { + case JT_BACKUP: + return _("Backup"); + case JT_VERIFY: + return _("Verifying"); + case JT_RESTORE: + return _("Restoring"); + case JT_ARCHIVE: + return _("Archiving"); + case JT_COPY: + return _("Copying"); + case JT_MIGRATE: + return _("Migration"); + case JT_SCAN: + return _("Scanning"); + default: + return _("Unknown operation"); + } } -/* Set Job level in JCR and also set appropriate read flag */ -void JCR::set_JobLevel(int32_t JobLevel) +/* Get an ASCII representation of the Action being performed either an english Verb or Adjective */ +const char *JCR::get_ActionName(bool past) { - m_JobLevel = JobLevel; + switch(m_JobType) { + case JT_BACKUP: + return _("backup"); + case JT_VERIFY: + return (past == true) ? _("verified") : _("verify"); + case JT_RESTORE: + return (past == true) ? _("restored") : _("restore"); + case JT_ARCHIVE: + return (past == true) ? _("archived") : _("archive"); + case JT_COPY: + return (past == true) ? _("copied") : _("copy"); + case JT_MIGRATE: + return (past == true) ? _("migrated") : _("migrate"); + case JT_SCAN: + return (past == true) ? _("scanned") : _("scan"); + default: + return _("unknown action"); + } } bool JCR::JobReads() @@ -273,6 +310,9 @@ static void job_end_pop(JCR *jcr) } } +/* + * Create thread key for thread specific data + */ void create_jcr_key() { int status = pthread_key_create(&jcr_key, NULL); @@ -306,6 +346,11 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr) memset(jcr, 0, size); jcr->my_thread_id = pthread_self(); jcr->msg_queue = New(dlist(item, &item->link)); + if ((status = pthread_mutex_init(&jcr->msg_queue_mutex, NULL)) != 0) { + berrno be; + Jmsg(NULL, M_ABORT, 0, _("Could not init msg_queue mutex. ERR=%s\n"), + be.bstrerror(status)); + } jcr->job_end_push.init(1, false); jcr->sched_time = time(NULL); jcr->daemon_free_jcr = daemon_free_jcr; /* plug daemon free routine */ @@ -318,9 +363,9 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr) /* Setup some dummy values */ bstrncpy(jcr->Job, "*System*", sizeof(jcr->Job)); jcr->JobId = 0; - jcr->set_JobType(JT_SYSTEM); /* internal job until defined */ - jcr->set_JobLevel(L_NONE); - set_jcr_job_status(jcr, JS_Created); /* ready to run */ + jcr->setJobType(JT_SYSTEM); /* internal job until defined */ + jcr->setJobLevel(L_NONE); + jcr->setJobStatus(JS_Created); /* ready to run */ set_jcr_in_tsd(jcr); sigtimer.sa_flags = 0; sigtimer.sa_handler = timeout_handler; @@ -372,6 +417,7 @@ static void free_common_jcr(JCR *jcr) if (jcr->msg_queue) { delete jcr->msg_queue; jcr->msg_queue = NULL; + pthread_mutex_destroy(&jcr->msg_queue_mutex); } close_msg(jcr); /* close messages for this job */ @@ -425,8 +471,7 @@ static void free_common_jcr(JCR *jcr) free_guid_list(jcr->id_list); jcr->id_list = NULL; } - /* Invalidate the tsd jcr data */ - set_jcr_in_tsd(INVALID_JCR); + remove_jcr_from_tsd(jcr); free(jcr); } @@ -451,7 +496,6 @@ void free_jcr(JCR *jcr) #endif - dequeue_messages(jcr); lock_jcr_chain(); jcr->dec_use_count(); /* decrement use count */ if (jcr->use_count() < 0) { @@ -471,13 +515,15 @@ void free_jcr(JCR *jcr) jcr->JobId, jcr->use_count(), jcr->Job); } remove_jcr(jcr); /* remove Jcr from chain */ + unlock_jcr_chain(); + dequeue_messages(jcr); job_end_pop(jcr); /* pop and call hooked routines */ Dmsg1(dbglvl, "End job=%d\n", jcr->JobId); /* Keep some statistics */ - switch (jcr->get_JobType()) { + switch (jcr->getJobType()) { case JT_BACKUP: case JT_VERIFY: case JT_RESTORE: @@ -486,11 +532,12 @@ void free_jcr(JCR *jcr) case JT_ADMIN: /* Keep list of last jobs, but not Console where JobId==0 */ if (jcr->JobId > 0) { + lock_last_jobs_list(); num_jobs_run++; je = (struct s_last_job *)malloc(sizeof(struct s_last_job)); memset(je, 0, sizeof(struct s_last_job)); /* zero in case unset fields */ - je->Errors = jcr->Errors; - je->JobType = jcr->get_JobType(); + je->Errors = jcr->JobErrors; + je->JobType = jcr->getJobType(); je->JobId = jcr->JobId; je->VolSessionId = jcr->VolSessionId; je->VolSessionTime = jcr->VolSessionTime; @@ -498,7 +545,7 @@ void free_jcr(JCR *jcr) je->JobFiles = jcr->JobFiles; je->JobBytes = jcr->JobBytes; je->JobStatus = jcr->JobStatus; - je->JobLevel = jcr->get_JobLevel(); + je->JobLevel = jcr->getJobLevel(); je->start_time = jcr->start_time; je->end_time = time(NULL); @@ -511,6 +558,7 @@ void free_jcr(JCR *jcr) last_jobs->remove(je); free(je); } + unlock_last_jobs_list(); } break; default: @@ -521,13 +569,27 @@ void free_jcr(JCR *jcr) jcr->daemon_free_jcr(jcr); /* call daemon free routine */ } - unlock_jcr_chain(); free_common_jcr(jcr); close_msg(NULL); /* flush any daemon messages */ garbage_collect_memory_pool(); Dmsg0(dbglvl, "Exit free_jcr\n"); } +/* + * Remove jcr from thread specific data, but + * but make sure it is us who are attached. + */ +void remove_jcr_from_tsd(JCR *jcr) +{ + JCR *tjcr = get_jcr_from_tsd(); + if (tjcr == jcr) { + set_jcr_in_tsd(INVALID_JCR); + } +} + +/* + * Put this jcr in the thread specifc data + */ void set_jcr_in_tsd(JCR *jcr) { int status = pthread_setspecific(jcr_key, (void *)jcr); @@ -537,6 +599,9 @@ void set_jcr_in_tsd(JCR *jcr) } } +/* + * Give me the jcr that is attached to this thread + */ JCR *get_jcr_from_tsd() { JCR *jcr = (JCR *)pthread_getspecific(jcr_key); @@ -585,6 +650,30 @@ JCR *get_jcr_by_id(uint32_t JobId) return jcr; } +/* + * Given a thread id, find the JobId + * Returns: JobId on success + * 0 on failure + */ +uint32_t get_jobid_from_tid(pthread_t tid) +{ + JCR *jcr = NULL; + bool found = false; + + foreach_jcr(jcr) { + if (pthread_equal(jcr->my_thread_id, tid)) { + found = true; + break; + } + } + endeach_jcr(jcr); + if (found) { + return jcr->JobId; + } + return 0; +} + + /* * Given a SessionId and SessionTime, find the JCR * Returns: jcr on success @@ -662,52 +751,34 @@ JCR *get_jcr_by_full_name(char *Job) return jcr; } -void set_jcr_job_status(JCR *jcr, int JobStatus) +static void update_wait_time(JCR *jcr, int newJobStatus) { - bool set_waittime=false; - Dmsg2(800, "set_jcr_job_status(%s, %c)\n", jcr->Job, JobStatus); - /* if wait state is new, we keep current time for watchdog MaxWaitTime */ - switch (JobStatus) { - case JS_WaitFD: - case JS_WaitSD: - case JS_WaitMedia: - case JS_WaitMount: - case JS_WaitStoreRes: - case JS_WaitJobRes: - case JS_WaitClientRes: - case JS_WaitMaxJobs: - case JS_WaitPriority: - set_waittime = true; - default: - break; - } - - /* - * For a set of errors, ... keep the current status - * so it isn't lost. For all others, set it. - */ - Dmsg3(300, "jid=%u OnEntry JobStatus=%c set=%c\n", (uint32_t)jcr->JobId, - jcr->JobStatus, JobStatus); - switch (jcr->JobStatus) { - case JS_ErrorTerminated: - case JS_FatalError: - case JS_Canceled: + bool enter_in_waittime; + int oldJobStatus = jcr->JobStatus; + + switch (newJobStatus) { + case JS_WaitFD: + case JS_WaitSD: + case JS_WaitMedia: + case JS_WaitMount: + case JS_WaitStoreRes: + case JS_WaitJobRes: + case JS_WaitClientRes: + case JS_WaitMaxJobs: + case JS_WaitPriority: + enter_in_waittime = true; break; - case JS_Error: - case JS_Differences: - switch (JobStatus) { - case JS_ErrorTerminated: - case JS_FatalError: - case JS_Canceled: - /* Override more minor status */ - jcr->JobStatus = JobStatus; - break; - default: - break; - } + default: + enter_in_waittime = false; /* not a Wait situation */ + break; + } + /* - * For a set of Wait situation, keep old time. + * If we were previously waiting and are not any more + * we want to update the wait_time variable, which is + * the start of waiting. */ + switch (oldJobStatus) { case JS_WaitFD: case JS_WaitSD: case JS_WaitMedia: @@ -717,17 +788,86 @@ void set_jcr_job_status(JCR *jcr, int JobStatus) case JS_WaitClientRes: case JS_WaitMaxJobs: case JS_WaitPriority: - set_waittime = false; /* keep old time */ + if (!enter_in_waittime) { /* we get out the wait time */ + jcr->wait_time_sum += (time(NULL) - jcr->wait_time); + jcr->wait_time = 0; + } + break; + + /* if wait state is new, we keep current time for watchdog MaxWaitTime */ default: - jcr->JobStatus = JobStatus; - if (set_waittime) { - /* set it before JobStatus */ - Dmsg0(800, "Setting wait_time\n"); + if (enter_in_waittime) { jcr->wait_time = time(NULL); } + break; + } +} + +/* + * Priority runs from 0 (lowest) to 10 (highest) + */ +static int get_status_priority(int JobStatus) +{ + int priority = 0; + switch (JobStatus) { + case JS_ErrorTerminated: + case JS_FatalError: + case JS_Canceled: + case JS_Incomplete: + priority = 10; + break; + case JS_Error: + priority = 8; + break; + case JS_Differences: + priority = 7; + break; + } + return priority; +} + + +void set_jcr_job_status(JCR *jcr, int JobStatus) +{ + jcr->setJobStatus(JobStatus); +} + +void JCR::setJobStatus(int newJobStatus) +{ + JCR *jcr = this; + int priority, old_priority; + int oldJobStatus = jcr->JobStatus; + priority = get_status_priority(newJobStatus); + old_priority = get_status_priority(oldJobStatus); + + Dmsg2(800, "set_jcr_job_status(%s, %c)\n", Job, newJobStatus); + + /* Update wait_time depending on newJobStatus and oldJobStatus */ + update_wait_time(jcr, newJobStatus); + + /* + * For a set of errors, ... keep the current status + * so it isn't lost. For all others, set it. + */ + Dmsg2(800, "OnEntry JobStatus=%c newJobstatus=%c\n", oldJobStatus, newJobStatus); + /* + * If status priority is > than proposed new status, change it. + * If status priority == new priority and both are zero, take + * the new status. + * If it is not zero, then we keep the first non-zero "error" that + * occurred. + */ + if (priority > old_priority || ( + priority == 0 && old_priority == 0)) { + Dmsg4(800, "Set new stat. old: %c,%d new: %c,%d\n", + jcr->JobStatus, old_priority, newJobStatus, priority); + jcr->JobStatus = newJobStatus; /* replace with new status */ + } + + if (oldJobStatus != jcr->JobStatus) { + Dmsg2(800, "leave set_job_status old=%c new=%c\n", oldJobStatus, newJobStatus); +// generate_plugin_event(jcr, bEventStatusChange, NULL); } - Dmsg3(200, "jid=%u leave set_jcr_job_status=%c set=%c\n", (uint32_t)jcr->JobId, - jcr->JobStatus, JobStatus); } #ifdef TRACE_JCR_CHAIN @@ -764,7 +904,6 @@ static void unlock_jcr_chain() V(jcr_lock); } - /* * Start walk of jcr chain * The proper way to walk the jcr chain is: @@ -911,6 +1050,41 @@ static void jcr_timeout_check(watchdog_t *self) Dmsg0(dbglvl, "Finished JCR timeout checks\n"); } +/* + * Return next JobId from comma separated list + * + * Returns: + * 1 if next JobId returned + * 0 if no more JobIds are in list + * -1 there is an error + */ +int get_next_jobid_from_list(char **p, uint32_t *JobId) +{ + const int maxlen = 30; + char jobid[maxlen+1]; + char *q = *p; + + jobid[0] = 0; + for (int i=0; ifirst(); jcr ; jcr = (JCR *)jcrs->next(jcr)) { + fprintf(fp, "JCR=%p JobId=%d name=%s JobStatus=%c\n", + jcr, (int)jcr->JobId, jcr->Job, jcr->JobStatus); + fprintf(fp, "\tuse_count=%i\n", jcr->use_count()); + fprintf(fp, "\tJobType=%c JobLevel=%c\n", + jcr->getJobType(), jcr->getJobLevel()); + bstrftime(buf1, sizeof(buf1), jcr->sched_time); + bstrftime(buf2, sizeof(buf2), jcr->start_time); + bstrftime(buf3, sizeof(buf3), jcr->end_time); + bstrftime(buf4, sizeof(buf4), jcr->wait_time); + fprintf(fp, "\tsched_time=%s start_time=%s\n\tend_time=%s wait_time=%s\n", + buf1, buf2, buf3, buf4); + fprintf(fp, "\tdb=%p db_batch=%p batch_started=%i\n", + jcr->db, jcr->db_batch, jcr->batch_started); + + /* + * Call all the jcr debug hooks + */ + for(int i=0; i < dbg_jcr_handler_count; i++) { + dbg_jcr_hook_t *hook = dbg_jcr_hooks[i]; + hook(jcr, fp); + } + } +}