]> git.sur5r.net Git - bacula/bacula/blobdiff - bacula/src/dird/jobq.c
Add heap stats to Dir and SD -- eliminate #ifdefs
[bacula/bacula] / bacula / src / dird / jobq.c
index ebedc4a1f28de469f3a51ff7c2aa35e02878fbea..8040ed77e0bef02a839ee91b431492dc592df1cd 100755 (executable)
@@ -18,7 +18,7 @@
  *
  */
 /*
-   Copyright (C) 2000-2003 Kern Sibbald and John Walker
+   Copyright (C) 2003-2004 Kern Sibbald and John Walker
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -42,7 +42,9 @@
 
 
 /* Forward referenced functions */
-static void *jobq_server(void *arg);
+extern "C" void *jobq_server(void *arg);
+extern "C" void *sched_wait(void *arg);
+
 static int   start_server(jobq_t *jq);
 
 /*   
@@ -152,7 +154,8 @@ struct wait_pkt {
  *  most jobs are put into the job queue only when their
  *  scheduled time arives.
  */
-static void *sched_wait(void *arg)
+extern "C"  
+void *sched_wait(void *arg)
 {
    JCR *jcr = ((wait_pkt *)arg)->jcr;
    jobq_t *jq = ((wait_pkt *)arg)->jq;
@@ -186,7 +189,6 @@ static void *sched_wait(void *arg)
    return NULL;
 }
 
-
 /*
  *  Add a job to the queue
  *    jq is a queue that was created with jobq_init
@@ -352,13 +354,14 @@ static int start_server(jobq_t *jq)
  * When all the resources are acquired for the job, 
  *  it will call the user's engine.
  */
-static void *jobq_server(void *arg)
+extern "C"  
+void *jobq_server(void *arg)
 {
    struct timespec timeout;
    jobq_t *jq = (jobq_t *)arg;
    jobq_item_t *je;                  /* job entry in queue */
    int stat;
-   bool timedout;
+   bool timedout = false;
    bool work = true;
 
    Dmsg0(300, "Start jobq_server\n");
@@ -373,30 +376,31 @@ static void *jobq_server(void *arg)
       struct timezone tz;
 
       Dmsg0(300, "Top of for loop\n");
-      timedout = false;
-      gettimeofday(&tv, &tz);
-      timeout.tv_nsec = 0;
-      timeout.tv_sec = tv.tv_sec + 4;
-
-      while (!work && !jq->quit) {
-        /*
-         * Wait 4 seconds, then if no more work, exit
-         */
-         Dmsg0(300, "pthread_cond_timedwait()\n");
-        stat = pthread_cond_timedwait(&jq->work, &jq->mutex, &timeout);
-        if (stat == ETIMEDOUT) {
-            Dmsg0(300, "timedwait timedout.\n");
-           timedout = true;
+      if (!work && !jq->quit) {
+        gettimeofday(&tv, &tz);
+        timeout.tv_nsec = 0;
+        timeout.tv_sec = tv.tv_sec + 4;
+
+        while (!jq->quit) {
+           /*
+            * Wait 4 seconds, then if no more work, exit
+            */
+            Dmsg0(300, "pthread_cond_timedwait()\n");
+           stat = pthread_cond_timedwait(&jq->work, &jq->mutex, &timeout);
+           if (stat == ETIMEDOUT) {
+               Dmsg0(300, "timedwait timedout.\n");
+              timedout = true;
+              break;
+           } else if (stat != 0) {
+               /* This shouldn't happen */
+               Dmsg0(300, "This shouldn't happen\n");
+              jq->num_workers--;
+              pthread_mutex_unlock(&jq->mutex);
+              return NULL;
+           }
            break;
-        } else if (stat != 0) {
-            /* This shouldn't happen */
-            Dmsg0(300, "This shouldn't happen\n");
-           jq->num_workers--;
-           pthread_mutex_unlock(&jq->mutex);
-           return NULL;
-        }
-        break;
-      } 
+        } 
+      }
       /* 
        * If anything is in the ready queue, run it
        */
@@ -421,12 +425,14 @@ static void *jobq_server(void *arg)
            jq->num_workers--;
            return NULL;
         }
+
          /* Call user's routine here */
          Dmsg1(300, "Calling user engine for jobid=%d\n", jcr->JobId);
         jq->engine(je->jcr);
+
          Dmsg1(300, "Back from user engine jobid=%d.\n", jcr->JobId);
         if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) {
-            Jmsg1(NULL, M_ERROR, 0, "pthread_mutex_unlock: ERR=%s\n", strerror(stat));
+            Jmsg1(NULL, M_ERROR, 0, "pthread_mutex_lock: ERR=%s\n", strerror(stat));
            jq->num_workers--;
            free(je);                 /* release job entry */
            return NULL;
@@ -440,6 +446,9 @@ static void *jobq_server(void *arg)
          */
         if (jcr->acquired_resource_locks) {
            jcr->store->NumConcurrentJobs--;
+           if (jcr->JobType == JT_RESTORE) {
+              jcr->store->MaxConcurrentJobs = jcr->saveMaxConcurrentJobs;  
+           }
            jcr->client->NumConcurrentJobs--;
            jcr->job->NumConcurrentJobs--;
         }
@@ -509,10 +518,10 @@ static void *jobq_server(void *arg)
         jobq_item_t *re = (jobq_item_t *)jq->running_jobs->first();
         if (re) {
            Priority = re->jcr->JobPriority;
-            Dmsg2(300, "JobId %d is running. Set pri=%d\n", re->jcr->JobId, Priority);
+            Dmsg2(300, "JobId %d is running. Look for pri=%d\n", re->jcr->JobId, Priority);
         } else {
            Priority = je->jcr->JobPriority;
-            Dmsg1(300, "No job running. Set Job pri=%d\n", Priority);
+            Dmsg1(300, "No job running. Look for Job pri=%d\n", Priority);
         }
         /*
          * Walk down the list of waiting jobs and attempt
@@ -529,17 +538,33 @@ static void *jobq_server(void *arg)
               set_jcr_job_status(jcr, JS_WaitPriority);
               break;
            }
-           if (jcr->store->NumConcurrentJobs < jcr->store->MaxConcurrentJobs) {
+           if (jcr->JobType == JT_RESTORE) {
+              /* Let only one Restore job run at a time regardless of MaxConcurrentJobs */
+              if (jcr->store->NumConcurrentJobs == 0) {
+                 jcr->store->NumConcurrentJobs++;
+                 jcr->saveMaxConcurrentJobs = jcr->store->MaxConcurrentJobs;
+                 jcr->store->MaxConcurrentJobs = 1;
+              } else {
+                 set_jcr_job_status(jcr, JS_WaitStoreRes);
+                 je = jn;
+                 continue;
+              }
+           } else if (jcr->store->NumConcurrentJobs < jcr->store->MaxConcurrentJobs) {
               jcr->store->NumConcurrentJobs++;
            } else {
               set_jcr_job_status(jcr, JS_WaitStoreRes);
               je = jn;
               continue;
            }
+
            if (jcr->client->NumConcurrentJobs < jcr->client->MaxConcurrentJobs) {
               jcr->client->NumConcurrentJobs++;
            } else {
+              /* Back out previous locks */
               jcr->store->NumConcurrentJobs--;
+              if (jcr->JobType == JT_RESTORE) {
+                 jcr->store->MaxConcurrentJobs = jcr->saveMaxConcurrentJobs;  
+              }
               set_jcr_job_status(jcr, JS_WaitClientRes);
               je = jn;
               continue;
@@ -547,7 +572,11 @@ static void *jobq_server(void *arg)
            if (jcr->job->NumConcurrentJobs < jcr->job->MaxConcurrentJobs) {
               jcr->job->NumConcurrentJobs++;
            } else {
+              /* Back out previous locks */
               jcr->store->NumConcurrentJobs--;
+              if (jcr->JobType == JT_RESTORE) {
+                 jcr->store->MaxConcurrentJobs = jcr->saveMaxConcurrentJobs;  
+              }
               jcr->client->NumConcurrentJobs--;
               set_jcr_job_status(jcr, JS_WaitJobRes);
               je = jn;
@@ -588,12 +617,36 @@ static void *jobq_server(void *arg)
         jq->num_workers--;
         break;
       }
-      work = !jq->ready_jobs->empty() || !jq->waiting_jobs->empty();              
+
+      work = !jq->ready_jobs->empty() || !jq->waiting_jobs->empty();
+      if (work) {
+        /*          
+          * If a job is waiting on a Resource, don't consume all
+         *   the CPU time looping looking for work, and even more
+         *   important, release the lock so that a job that has
+         *   terminated can give us the resource.
+         */
+        if ((stat = pthread_mutex_unlock(&jq->mutex)) != 0) {
+            Jmsg1(NULL, M_ERROR, 0, "pthread_mutex_unlock: ERR=%s\n", strerror(stat));
+           jq->num_workers--;
+           return NULL;
+        }
+        bmicrosleep(2, 0);              /* pause for 2 seconds */
+        if ((stat = pthread_mutex_lock(&jq->mutex)) != 0) {
+            Jmsg1(NULL, M_ERROR, 0, "pthread_mutex_lock: ERR=%s\n", strerror(stat));
+           jq->num_workers--;
+           return NULL;
+        }
+        /* Recompute work as something may have changed in last 2 secs */
+        work = !jq->ready_jobs->empty() || !jq->waiting_jobs->empty();
+      }
       Dmsg1(300, "Loop again. work=%d\n", work);
    } /* end of big for loop */
 
    Dmsg0(200, "unlock mutex\n");
-   pthread_mutex_unlock(&jq->mutex);
+   if ((stat = pthread_mutex_unlock(&jq->mutex)) != 0) {
+      Jmsg1(NULL, M_ERROR, 0, "pthread_mutex_unlock: ERR=%s\n", strerror(stat));
+   }
    Dmsg0(300, "End jobq_server\n");
    return NULL;
 }