]> git.sur5r.net Git - bacula/bacula/blobdiff - bacula/src/lib/jcr.c
State file debug + full functionality for alist
[bacula/bacula] / bacula / src / lib / jcr.c
index f303f0d4febd343e29f6b8aae23eb26663135a5d..00cd0307d9fe4964b581099d183dd5d3a201f619 100755 (executable)
@@ -38,46 +38,117 @@ extern time_t watchdog_time;
 static void timeout_handler(int sig);
 static void jcr_timeout_check(watchdog_t *self);
 
-struct s_last_job last_job;    /* last job run by this daemon */
+int num_jobs_run;
 dlist *last_jobs = NULL;
-#define MAX_LAST_JOBS 10
+#define MAX_LAST_JOBS 15
 
 static JCR *jobs = NULL;             /* pointer to JCR chain */
-
-/* Mutex for locking various jcr chains while updating */
-static pthread_mutex_t jcr_chain_mutex = PTHREAD_MUTEX_INITIALIZER;
+static brwlock_t lock;               /* lock for last jobs and JCR chain */
 
 void init_last_jobs_list()
 {
-   struct s_last_job *job_entry;
+   int errstat;
+   struct s_last_job *job_entry = NULL;
    if (!last_jobs) {
       last_jobs = new dlist(job_entry, &job_entry->link);
-      memset(&last_job, 0, sizeof(last_job));
+      if ((errstat=rwl_init(&lock)) != 0) {
+         Emsg1(M_ABORT, 0, _("Unable to initialize jcr_chain lock. ERR=%s\n"), 
+              strerror(errstat));
+      }
    }
+
 }
 
 void term_last_jobs_list()
 {
-   char *je;
+   struct s_last_job *je;
    if (last_jobs) {
       foreach_dlist(je, last_jobs) {
         free(je);                     
       }
       delete last_jobs;
       last_jobs = NULL;
+      rwl_destroy(&lock);
+   }
+}
+
+void read_last_jobs_list(int fd, uint64_t addr)
+{
+   struct s_last_job *je, job;
+   uint32_t num;
+
+   Dmsg1(010, "read_last_jobs seek to %d\n", (int)addr);
+   if (addr == 0 || lseek(fd, addr, SEEK_SET) < 0) {
+      return;
+   }
+   if (read(fd, &num, sizeof(num)) != sizeof(num)) {
+      return;
+   }
+   Dmsg1(010, "Read num_items=%d\n", num);
+   if (num > 4 * MAX_LAST_JOBS) {  /* sanity check */
+      return;
+   }
+   for ( ; num; num--) {
+      if (read(fd, &job, sizeof(job)) != sizeof(job)) {
+         Dmsg1(000, "Read job entry. ERR=%s\n", strerror(errno));
+        return;
+      }
+      if (job.JobId > 0) {
+        je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
+        memcpy((char *)je, (char *)&job, sizeof(job));
+        if (!last_jobs) {
+           init_last_jobs_list();
+        }
+        last_jobs->append(je);
+        if (last_jobs->size() > MAX_LAST_JOBS) {
+           last_jobs->remove(last_jobs->first());
+        }
+      }
+   }
+}
+
+uint64_t write_last_jobs_list(int fd, uint64_t addr)
+{
+   struct s_last_job *je;
+   uint32_t num;
+
+   Dmsg1(010, "write_last_jobs seek to %d\n", (int)addr);
+   if (lseek(fd, addr, SEEK_SET) < 0) {
+      return 0;
+   }
+   if (last_jobs) {
+      /* First record is number of entires */
+      num = last_jobs->size();
+      if (write(fd, &num, sizeof(num)) != sizeof(num)) {
+         Dmsg1(000, "Error writing num_items: ERR=%s\n", strerror(errno));
+        return 0;
+      }
+      foreach_dlist(je, last_jobs) {
+        if (write(fd, je, sizeof(struct s_last_job)) != sizeof(struct s_last_job)) {
+            Dmsg1(000, "Error writing job: ERR=%s\n", strerror(errno));
+           return 0;
+        }
+      }
    }
+   /* Return current address */
+   ssize_t stat = lseek(fd, 0, SEEK_CUR);
+   if (stat < 0) {
+      stat = 0;
+   }
+   return stat;
+      
 }
 
 void lock_last_jobs_list() 
 {
    /* Use jcr chain mutex */
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
 }
 
 void unlock_last_jobs_list() 
 {
    /* Use jcr chain mutex */
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
 }
 
 /*
@@ -89,11 +160,13 @@ void unlock_last_jobs_list()
 JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
 {
    JCR *jcr;
+   MQUEUE_ITEM *item = NULL;
    struct sigaction sigtimer;
 
    Dmsg0(200, "Enter new_jcr\n");
    jcr = (JCR *)malloc(size);
    memset(jcr, 0, size);
+   jcr->msg_queue = new dlist(item, &item->link);
    jcr->my_thread_id = pthread_self();
    jcr->sched_time = time(NULL);
    jcr->daemon_free_jcr = daemon_free_jcr;    /* plug daemon free routine */
@@ -111,14 +184,14 @@ JCR *new_jcr(int size, JCR_free_HANDLER *daemon_free_jcr)
    sigfillset(&sigtimer.sa_mask);
    sigaction(TIMEOUT_SIGNAL, &sigtimer, NULL);
 
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
    jcr->prev = NULL;
    jcr->next = jobs;
    if (jobs) {
       jobs->prev = jcr;
    }
    jobs = jcr;
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    return jcr;
 }
 
@@ -151,13 +224,15 @@ static void remove_jcr(JCR *jcr)
  */
 static void free_common_jcr(JCR *jcr)
 {
+   struct s_last_job *je, last_job;
+
    /* Keep some statistics */
    switch (jcr->JobType) {
    case JT_BACKUP:
    case JT_VERIFY:
    case JT_RESTORE:
    case JT_ADMIN:
-      last_job.NumJobs++;
+      num_jobs_run++;
       last_job.JobType = jcr->JobType;
       last_job.JobId = jcr->JobId;
       last_job.VolSessionId = jcr->VolSessionId;
@@ -169,6 +244,18 @@ static void free_common_jcr(JCR *jcr)
       last_job.JobLevel = jcr->JobLevel;
       last_job.start_time = jcr->start_time;
       last_job.end_time = time(NULL);
+      /* Keep list of last jobs, but not Console where JobId==0 */
+      if (last_job.JobId > 0) {
+        je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
+        memcpy((char *)je, (char *)&last_job, sizeof(last_job));
+        if (!last_jobs) {
+           init_last_jobs_list();
+        }
+        last_jobs->append(je);
+        if (last_jobs->size() > MAX_LAST_JOBS) {
+           last_jobs->remove(last_jobs->first());
+        }
+      }
       break;
    default:
       break;
@@ -176,6 +263,7 @@ static void free_common_jcr(JCR *jcr)
    pthread_mutex_destroy(&jcr->mutex);
 
    close_msg(jcr);                   /* close messages for this job */
+   delete jcr->msg_queue;
 
    /* do this after closing messages */
    if (jcr->client_name) {
@@ -218,7 +306,7 @@ static void free_common_jcr(JCR *jcr)
  * Global routine to free a jcr
  */
 #ifdef DEBUG
-void b_free_jcr(char *file, int line, JCR *jcr)
+void b_free_jcr(const char *file, int line, JCR *jcr)
 {
    Dmsg3(200, "Enter free_jcr 0x%x from %s:%d\n", jcr, file, line);
 
@@ -230,14 +318,17 @@ void free_jcr(JCR *jcr)
    Dmsg1(200, "Enter free_jcr 0x%x\n", jcr);
 
 #endif
-   struct s_last_job *je;
 
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
    jcr->use_count--;                 /* decrement use count */
-   Dmsg3(200, "Dec jcr 0x%x use_count=%d jobid=%d\n", jcr, jcr->use_count, jcr->JobId);
+   if (jcr->use_count < 0) {
+      Emsg2(M_ERROR, 0, _("JCR use_count=%d JobId=%d\n"),
+        jcr->use_count, jcr->JobId);
+   }
+   Dmsg3(200, "Dec free_jcr 0x%x use_count=%d jobid=%d\n", jcr, jcr->use_count, jcr->JobId);
    if (jcr->use_count > 0) {         /* if in use */
-      V(jcr_chain_mutex);
-      Dmsg2(200, "jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+      unlock_jcr_chain();
+      Dmsg2(200, "free_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
       return;
    }
    remove_jcr(jcr);
@@ -249,21 +340,8 @@ void free_jcr(JCR *jcr)
 
    free_common_jcr(jcr);
 
-   /* Keep list of last jobs, but not Console where JobId==0 */
-   if (last_job.JobId > 0) {
-      je = (struct s_last_job *)malloc(sizeof(struct s_last_job));
-      memcpy((char *)je, (char *)&last_job, sizeof(last_job));
-      if (!last_jobs) {
-        init_last_jobs_list();
-      }
-      last_jobs->append(je);
-      if (last_jobs->size() > MAX_LAST_JOBS) {
-        last_jobs->remove(last_jobs->first());
-      }
-      last_job.JobId = 0;            /* zap last job */
-   }
    close_msg(NULL);                  /* flush any daemon messages */
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    Dmsg0(200, "Exit free_jcr\n");
 }
 
@@ -275,7 +353,7 @@ void free_jcr(JCR *jcr)
 void free_locked_jcr(JCR *jcr)
 {
    jcr->use_count--;                 /* decrement use count */
-   Dmsg2(200, "Dec jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+   Dmsg2(200, "Dec free_locked_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
    if (jcr->use_count > 0) {         /* if in use */
       return;
    }
@@ -296,17 +374,17 @@ JCR *get_jcr_by_id(uint32_t JobId)
 {
    JCR *jcr;      
 
-   P(jcr_chain_mutex);                 /* lock chain */
+   lock_jcr_chain();                   /* lock chain */
    for (jcr = jobs; jcr; jcr=jcr->next) {
       if (jcr->JobId == JobId) {
         P(jcr->mutex);
         jcr->use_count++;
         V(jcr->mutex);
-         Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+         Dmsg2(200, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
         break;
       }
    }
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    return jcr; 
 }
 
@@ -319,18 +397,18 @@ JCR *get_jcr_by_session(uint32_t SessionId, uint32_t SessionTime)
 {
    JCR *jcr;      
 
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
    for (jcr = jobs; jcr; jcr=jcr->next) {
       if (jcr->VolSessionId == SessionId && 
          jcr->VolSessionTime == SessionTime) {
         P(jcr->mutex);
         jcr->use_count++;
         V(jcr->mutex);
-         Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+         Dmsg2(200, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
         break;
       }
    }
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    return jcr; 
 }
 
@@ -350,18 +428,18 @@ JCR *get_jcr_by_partial_name(char *Job)
    if (!Job) {
       return NULL;
    }
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
    len = strlen(Job);
    for (jcr = jobs; jcr; jcr=jcr->next) {
       if (strncmp(Job, jcr->Job, len) == 0) {
         P(jcr->mutex);
         jcr->use_count++;
         V(jcr->mutex);
-         Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+         Dmsg2(200, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
         break;
       }
    }
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    return jcr; 
 }
 
@@ -379,17 +457,17 @@ JCR *get_jcr_by_full_name(char *Job)
    if (!Job) {
       return NULL;
    }
-   P(jcr_chain_mutex);
+   lock_jcr_chain();
    for (jcr = jobs; jcr; jcr=jcr->next) {
       if (strcmp(jcr->Job, Job) == 0) {
         P(jcr->mutex);
         jcr->use_count++;
         V(jcr->mutex);
-         Dmsg2(200, "Inc jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
+         Dmsg2(200, "Inc get_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
         break;
       }
    }
-   V(jcr_chain_mutex);
+   unlock_jcr_chain();
    return jcr; 
 }
 
@@ -416,7 +494,11 @@ void set_jcr_job_status(JCR *jcr, int JobStatus)
  */
 void lock_jcr_chain()
 {
-   P(jcr_chain_mutex);
+   int errstat;
+   if ((errstat=rwl_writelock(&lock)) != 0) {
+      Emsg1(M_ABORT, 0, "rwl_writelock failure. ERR=%s\n",
+          strerror(errstat));
+   }
 }
 
 /*
@@ -424,31 +506,35 @@ void lock_jcr_chain()
  */
 void unlock_jcr_chain()
 {
-   V(jcr_chain_mutex);
+   int errstat;
+   if ((errstat=rwl_writeunlock(&lock)) != 0) {
+      Emsg1(M_ABORT, 0, "rwl_writeunlock failure. ERR=%s\n",
+          strerror(errstat));
+   }
 }
 
 
-JCR *get_next_jcr(JCR *jcr)
+JCR *get_next_jcr(JCR *prev_jcr)
 {
-   JCR *rjcr;
+   JCR *jcr;
 
-   if (jcr == NULL) {
-      rjcr = jobs;
+   if (prev_jcr == NULL) {
+      jcr = jobs;
    } else {
-      rjcr = jcr->next;
+      jcr = prev_jcr->next;
    }
-   if (rjcr) {
-      P(rjcr->mutex);
-      rjcr->use_count++;
-      V(rjcr->mutex);
-      Dmsg1(200, "Inc jcr use_count=%d\n", rjcr->use_count);
+   if (jcr) {
+      P(jcr->mutex);
+      jcr->use_count++;
+      V(jcr->mutex);
+      Dmsg2(200, "Inc get_next_jcr 0x%x use_count=%d\n", jcr, jcr->use_count);
    }
-   return rjcr;
+   return jcr;
 }
 
 bool init_jcr_subsystem(void)
 {
-   watchdog_t *wd = watchdog_new();
+   watchdog_t *wd = new_watchdog();
 
    wd->one_shot = false;
    wd->interval = 30;  /* FIXME: should be configurable somewhere, even
@@ -466,13 +552,13 @@ static void jcr_timeout_check(watchdog_t *self)
    BSOCK *fd;
    time_t timer_start;
 
-   Dmsg0(200, "Start JCR timeout checks\n");
+   Dmsg0(400, "Start JCR timeout checks\n");
 
    /* Walk through all JCRs checking if any one is 
     * blocked for more than specified max time.
     */
    lock_jcr_chain();
-   for (jcr=NULL; (jcr=get_next_jcr(jcr)); ) {
+   foreach_jcr(jcr) {
       free_locked_jcr(jcr);          /* OK to free now cuz chain is locked */
       if (jcr->JobId == 0) {
         continue;
@@ -523,7 +609,7 @@ static void jcr_timeout_check(watchdog_t *self)
 /*
  * Timeout signal comes here
  */
-void timeout_handler(int sig)
+static void timeout_handler(int sig)
 {
    return;                           /* thus interrupting the function */
 }