3 * Bacula Director -- migrate.c -- responsible for doing
6 * Kern Sibbald, September MMIV
8 * Basic tasks done here:
9 * Open DB and create records for this job.
10 * Open Message Channel with Storage daemon to tell him a job will be starting.
11 * Open connection with Storage daemon and pass him commands
13 * When the Storage daemon finishes the job, update the DB.
18 Bacula® - The Network Backup Solution
20 Copyright (C) 2004-2006 Free Software Foundation Europe e.V.
22 The main author of Bacula is Kern Sibbald, with contributions from
23 many others, a complete list can be found in the file AUTHORS.
24 This program is Free Software; you can redistribute it and/or
25 modify it under the terms of version two of the GNU General Public
26 License as published by the Free Software Foundation plus additions
27 that are listed in the file LICENSE.
29 This program is distributed in the hope that it will be useful, but
30 WITHOUT ANY WARRANTY; without even the implied warranty of
31 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
32 General Public License for more details.
34 You should have received a copy of the GNU General Public License
35 along with this program; if not, write to the Free Software
36 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
39 Bacula® is a registered trademark of John Walker.
40 The licensor of Bacula is the Free Software Foundation Europe
41 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
42 Switzerland, email:ftf@fsfeurope.org.
49 #include "lib/bregex.h"
54 static const int dbglevel = 10;
56 static char OKbootstrap[] = "3000 OK bootstrap\n";
57 static bool get_job_to_migrate(JCR *jcr);
59 static bool regex_find_jobids(JCR *jcr, idpkt *ids, const char *query1,
60 const char *query2, const char *type);
61 static bool find_mediaid_then_jobids(JCR *jcr, idpkt *ids, const char *query1,
63 static bool find_jobids_from_mediaid_list(JCR *jcr, idpkt *ids, const char *type);
64 static void start_migration_job(JCR *jcr);
65 static int get_next_dbid_from_list(char **p, DBId_t *DBId);
68 * Called here before the job is run to do the job
69 * specific setup. Note, one of the important things to
70 * complete in this init code is to make the definitive
71 * choice of input and output storage devices. This is
72 * because immediately after the init, the job is queued
73 * in the jobq.c code, and it checks that all the resources
74 * (storage resources in particular) are available, so these
75 * must all be properly defined.
77 * previous_jr refers to the job DB record of the Job that is
78 * going to be migrated.
79 * prev_job refers to the job resource of the Job that is
80 * going to be migrated.
81 * jcr is the jcr for the current "migration" job. It is a
82 * control job that is put in the DB as a migration job, which
83 * means that this job migrated a previous job to a new job.
84 * No Volume or File data is associated with this control
86 * mig_jcr refers to the newly migrated job that is run by
87 * the current jcr. It is a backup job that moves (migrates) the
88 * data written for the previous_jr into the new pool. This
89 * job (mig_jcr) becomes the new backup job that replaces
90 * the original backup job.
92 bool do_migration_init(JCR *jcr)
98 JCR *mig_jcr; /* newly migrated job */
100 /* If we find a job or jobs to migrate it is previous_jr.JobId */
101 if (!get_job_to_migrate(jcr)) {
104 Dmsg1(dbglevel, "Back from get_job_to_migrate JobId=%d\n", (int)jcr->JobId);
106 if (jcr->previous_jr.JobId == 0) {
107 Dmsg1(dbglevel, "JobId=%d no previous JobId\n", (int)jcr->JobId);
108 Jmsg(jcr, M_INFO, 0, _("No previous Job found to migrate.\n"));
109 return true; /* no work */
112 if (!get_or_create_fileset_record(jcr)) {
113 Dmsg1(dbglevel, "JobId=%d no FileSet\n", (int)jcr->JobId);
114 Jmsg(jcr, M_FATAL, 0, _("Could not get or create the FileSet record.\n"));
118 apply_pool_overrides(jcr);
120 jcr->jr.PoolId = get_or_create_pool_record(jcr, jcr->pool->hdr.name);
121 if (jcr->jr.PoolId == 0) {
122 Dmsg1(dbglevel, "JobId=%d no PoolId\n", (int)jcr->JobId);
123 Jmsg(jcr, M_FATAL, 0, _("Could not get or create a Pool record.\n"));
127 create_restore_bootstrap_file(jcr);
129 if (jcr->previous_jr.JobId == 0 || jcr->ExpectedFiles == 0) {
130 set_jcr_job_status(jcr, JS_Terminated);
131 Dmsg1(dbglevel, "JobId=%d expected files == 0\n", (int)jcr->JobId);
132 if (jcr->previous_jr.JobId == 0) {
133 Jmsg(jcr, M_INFO, 0, _("No previous Job found to migrate.\n"));
135 Jmsg(jcr, M_INFO, 0, _("Previous Job has no data to migrate.\n"));
137 return true; /* no work */
140 Dmsg5(dbglevel, "JobId=%d: Previous: Name=%s JobId=%d Type=%c Level=%c\n",
142 jcr->previous_jr.Name, (int)jcr->previous_jr.JobId,
143 jcr->previous_jr.JobType, jcr->previous_jr.JobLevel);
145 Dmsg5(dbglevel, "JobId=%d: Current: Name=%s JobId=%d Type=%c Level=%c\n",
147 jcr->jr.Name, (int)jcr->jr.JobId,
148 jcr->jr.JobType, jcr->jr.JobLevel);
151 job = (JOB *)GetResWithName(R_JOB, jcr->jr.Name);
152 prev_job = (JOB *)GetResWithName(R_JOB, jcr->previous_jr.Name);
155 Jmsg(jcr, M_FATAL, 0, _("Job resource not found for \"%s\".\n"), jcr->jr.Name);
159 Jmsg(jcr, M_FATAL, 0, _("Previous Job resource not found for \"%s\".\n"),
160 jcr->previous_jr.Name);
164 /* Create a migation jcr */
165 mig_jcr = jcr->mig_jcr = new_jcr(sizeof(JCR), dird_free_jcr);
166 memcpy(&mig_jcr->previous_jr, &jcr->previous_jr, sizeof(mig_jcr->previous_jr));
169 * Turn the mig_jcr into a "real" job that takes on the aspects of
170 * the previous backup job "prev_job".
172 set_jcr_defaults(mig_jcr, prev_job);
173 if (!setup_job(mig_jcr)) {
174 Jmsg(jcr, M_FATAL, 0, _("setup job failed.\n"));
178 /* Now reset the job record from the previous job */
179 memcpy(&mig_jcr->jr, &jcr->previous_jr, sizeof(mig_jcr->jr));
180 /* Update the jr to reflect the new values of PoolId, FileSetId, and JobId. */
181 mig_jcr->jr.PoolId = jcr->jr.PoolId;
182 mig_jcr->jr.FileSetId = jcr->jr.FileSetId;
183 mig_jcr->jr.JobId = mig_jcr->JobId;
185 Dmsg4(dbglevel, "mig_jcr: Name=%s JobId=%d Type=%c Level=%c\n",
186 mig_jcr->jr.Name, (int)mig_jcr->jr.JobId,
187 mig_jcr->jr.JobType, mig_jcr->jr.JobLevel);
190 * Get the PoolId used with the original job. Then
191 * find the pool name from the database record.
193 memset(&pr, 0, sizeof(pr));
194 pr.PoolId = mig_jcr->previous_jr.PoolId;
195 if (!db_get_pool_record(jcr, jcr->db, &pr)) {
196 Jmsg(jcr, M_FATAL, 0, _("Pool for JobId %s not in database. ERR=%s\n"),
197 edit_int64(pr.PoolId, ed1), db_strerror(jcr->db));
200 /* Get the pool resource corresponding to the original job */
201 pool = (POOL *)GetResWithName(R_POOL, pr.Name);
203 Jmsg(jcr, M_FATAL, 0, _("Pool resource \"%s\" not found.\n"), pr.Name);
207 /* If pool storage specified, use it for restore */
208 copy_rstorage(mig_jcr, pool->storage, _("Pool resource"));
209 copy_rstorage(jcr, pool->storage, _("Pool resource"));
212 * If the original backup pool has a NextPool, make sure a
213 * record exists in the database. Note, in this case, we
214 * will be migrating from pool to pool->NextPool.
216 if (pool->NextPool) {
217 jcr->jr.PoolId = get_or_create_pool_record(jcr, pool->NextPool->hdr.name);
218 if (jcr->jr.PoolId == 0) {
222 * put the "NextPool" resource pointer in our jcr so that we
223 * can pull the Storage reference from it.
225 mig_jcr->pool = jcr->pool = pool->NextPool;
226 mig_jcr->jr.PoolId = jcr->jr.PoolId;
227 pm_strcpy(jcr->pool_source, _("NextPool in Pool resource"));
229 Jmsg(jcr, M_FATAL, 0, _("No Next Pool specification found in Pool \"%s\".\n"),
234 if (!jcr->pool->storage || jcr->pool->storage->size() == 0) {
235 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Next Pool \"%s\".\n"),
236 jcr->pool->hdr.name);
240 /* If pool storage specified, use it instead of job storage for backup */
241 copy_wstorage(jcr, jcr->pool->storage, _("NextPool in Pool resource"));
247 * Do a Migration of a previous job
249 * Returns: false on failure
252 bool do_migration(JCR *jcr)
256 JCR *mig_jcr = jcr->mig_jcr; /* newly migrated job */
259 * If mig_jcr is NULL, there is nothing to do for this job,
260 * so set a normal status, cleanup and return OK.
263 set_jcr_job_status(jcr, JS_Terminated);
264 migration_cleanup(jcr, jcr->JobStatus);
268 /* Print Job Start message */
269 Jmsg(jcr, M_INFO, 0, _("Start Migration JobId %s, Job=%s\n"),
270 edit_uint64(jcr->JobId, ed1), jcr->Job);
272 set_jcr_job_status(jcr, JS_Running);
273 set_jcr_job_status(mig_jcr, JS_Running);
274 Dmsg2(dbglevel, "JobId=%d JobLevel=%c\n", (int)jcr->jr.JobId, jcr->jr.JobLevel);
276 /* Update job start record for this migration control job */
277 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
278 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
282 Dmsg4(dbglevel, "mig_jcr: Name=%s JobId=%d Type=%c Level=%c\n",
283 mig_jcr->jr.Name, (int)mig_jcr->jr.JobId,
284 mig_jcr->jr.JobType, mig_jcr->jr.JobLevel);
286 /* Update job start record for the real migration backup job */
287 if (!db_update_job_start_record(mig_jcr, mig_jcr->db, &mig_jcr->jr)) {
288 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(mig_jcr->db));
294 * Open a message channel connection with the Storage
295 * daemon. This is to let him know that our client
296 * will be contacting him for a backup session.
299 Dmsg0(110, "Open connection with storage daemon\n");
300 set_jcr_job_status(jcr, JS_WaitSD);
301 set_jcr_job_status(mig_jcr, JS_WaitSD);
303 * Start conversation with Storage daemon
305 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
308 sd = jcr->store_bsock;
310 * Now start a job with the Storage daemon
312 Dmsg2(dbglevel, "Read store=%s, write store=%s\n",
313 ((STORE *)jcr->rstorage->first())->name(),
314 ((STORE *)jcr->wstorage->first())->name());
315 if (((STORE *)jcr->rstorage->first())->name() == ((STORE *)jcr->wstorage->first())->name()) {
316 Jmsg(jcr, M_FATAL, 0, _("Read storage \"%s\" same as write storage.\n"),
317 ((STORE *)jcr->rstorage->first())->name());
320 if (!start_storage_daemon_job(jcr, jcr->rstorage, jcr->wstorage)) {
323 Dmsg0(150, "Storage daemon connection OK\n");
325 if (!send_bootstrap_file(jcr, sd) ||
326 !response(jcr, sd, OKbootstrap, "Bootstrap", DISPLAY_ERROR)) {
330 if (!bnet_fsend(sd, "run")) {
335 * Now start a Storage daemon message thread
337 if (!start_storage_daemon_message_thread(jcr)) {
342 set_jcr_job_status(jcr, JS_Running);
343 set_jcr_job_status(mig_jcr, JS_Running);
345 /* Pickup Job termination data */
346 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
347 wait_for_storage_daemon_termination(jcr);
349 set_jcr_job_status(jcr, jcr->SDJobStatus);
350 if (jcr->JobStatus != JS_Terminated) {
354 migration_cleanup(jcr, jcr->JobStatus);
356 UAContext *ua = new_ua_context(jcr);
357 purge_job_records_from_catalog(ua, jcr->previous_jr.JobId);
368 /* Add an item to the list if it is unique */
369 static void add_unique_id(idpkt *ids, char *item)
374 /* Walk through current list to see if each item is the same as item */
377 for (int i=0; i<(int)sizeof(id); i++) {
380 } else if (*q == ',') {
387 if (strcmp(item, id) == 0) {
391 /* Did not find item, so add it to list */
392 if (ids->count == 0) {
395 pm_strcat(ids->list, ",");
397 pm_strcat(ids->list, item);
399 // Dmsg3(0, "add_uniq count=%d Ids=%p %s\n", ids->count, ids->list, ids->list);
404 * Callback handler make list of DB Ids
406 static int unique_dbid_handler(void *ctx, int num_fields, char **row)
408 idpkt *ids = (idpkt *)ctx;
410 add_unique_id(ids, row[0]);
411 Dmsg3(dbglevel, "dbid_hdlr count=%d Ids=%p %s\n", ids->count, ids->list, ids->list);
421 static int item_compare(void *item1, void *item2)
423 uitem *i1 = (uitem *)item1;
424 uitem *i2 = (uitem *)item2;
425 return strcmp(i1->item, i2->item);
428 static int unique_name_handler(void *ctx, int num_fields, char **row)
430 dlist *list = (dlist *)ctx;
432 uitem *new_item = (uitem *)malloc(sizeof(uitem));
435 memset(new_item, 0, sizeof(uitem));
436 new_item->item = bstrdup(row[0]);
437 Dmsg1(dbglevel, "Unique_name_hdlr Item=%s\n", row[0]);
438 item = (uitem *)list->binary_insert((void *)new_item, item_compare);
439 if (item != new_item) { /* already in list */
440 free(new_item->item);
441 free((char *)new_item);
447 /* Get Job names in Pool */
448 const char *sql_job =
449 "SELECT DISTINCT Job.Name from Job,Pool"
450 " WHERE Pool.Name='%s' AND Job.PoolId=Pool.PoolId";
452 /* Get JobIds from regex'ed Job names */
453 const char *sql_jobids_from_job =
454 "SELECT DISTINCT Job.JobId,Job.StartTime FROM Job,Pool"
455 " WHERE Job.Name='%s' AND Pool.Name='%s' AND Job.PoolId=Pool.PoolId"
456 " ORDER by Job.StartTime";
458 /* Get Client names in Pool */
459 const char *sql_client =
460 "SELECT DISTINCT Client.Name from Client,Pool,Job"
461 " WHERE Pool.Name='%s' AND Job.ClientId=Client.ClientId AND"
462 " Job.PoolId=Pool.PoolId";
464 /* Get JobIds from regex'ed Client names */
465 const char *sql_jobids_from_client =
466 "SELECT DISTINCT Job.JobId,Job.StartTime FROM Job,Pool,Client"
467 " WHERE Client.Name='%s' AND Pool.Name='%s' AND Job.PoolId=Pool.PoolId"
468 " AND Job.ClientId=Client.ClientId "
469 " ORDER by Job.StartTime";
471 /* Get Volume names in Pool */
472 const char *sql_vol =
473 "SELECT DISTINCT VolumeName FROM Media,Pool WHERE"
474 " VolStatus in ('Full','Used','Error') AND Media.Enabled=1 AND"
475 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'";
477 /* Get JobIds from regex'ed Volume names */
478 const char *sql_jobids_from_vol =
479 "SELECT DISTINCT Job.JobId,Job.StartTime FROM Media,JobMedia,Job"
480 " WHERE Media.VolumeName='%s' AND Media.MediaId=JobMedia.MediaId"
481 " AND JobMedia.JobId=Job.JobId"
482 " ORDER by Job.StartTime";
485 const char *sql_smallest_vol =
486 "SELECT MediaId FROM Media,Pool WHERE"
487 " VolStatus in ('Full','Used','Error') AND Media.Enabled=1 AND"
488 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
489 " ORDER BY VolBytes ASC LIMIT 1";
491 const char *sql_oldest_vol =
492 "SELECT MediaId FROM Media,Pool WHERE"
493 " VolStatus in ('Full','Used','Error') AND Media.Enabled=1 AND"
494 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
495 " ORDER BY LastWritten ASC LIMIT 1";
497 /* Get JobIds when we have selected MediaId */
498 const char *sql_jobids_from_mediaid =
499 "SELECT DISTINCT Job.JobId,Job.StartTime FROM JobMedia,Job"
500 " WHERE JobMedia.JobId=Job.JobId AND JobMedia.MediaId=%s"
501 " ORDER by Job.StartTime";
503 /* Get tne number of bytes in the pool */
504 const char *sql_pool_bytes =
505 "SELECT SUM(VolBytes) FROM Media,Pool WHERE"
506 " VolStatus in ('Full','Used','Error','Append') AND Media.Enabled=1 AND"
507 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'";
509 /* Get tne number of bytes in the Jobs */
510 const char *sql_job_bytes =
511 "SELECT SUM(JobBytes) FROM Job WHERE JobId IN (%s)";
514 /* Get Media Ids in Pool */
515 const char *sql_mediaids =
516 "SELECT MediaId FROM Media,Pool WHERE"
517 " VolStatus in ('Full','Used','Error') AND Media.Enabled=1 AND"
518 " Media.PoolId=Pool.PoolId AND Pool.Name='%s' ORDER BY LastWritten ASC";
520 /* Get JobIds in Pool longer than specified time */
521 const char *sql_pool_time =
522 "SELECT DISTINCT Job.JobId from Pool,Job,Media,JobMedia WHERE"
523 " Pool.Name='%s' AND Media.PoolId=Pool.PoolId AND"
524 " VolStatus in ('Full','Used','Error') AND Media.Enabled=1 AND"
525 " JobMedia.JobId=Job.JobId AND Job.PoolId=Media.PoolId"
526 " AND Job.RealEndTime<='%s'";
529 * const char *sql_ujobid =
530 * "SELECT DISTINCT Job.Job from Client,Pool,Media,Job,JobMedia "
531 * " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND"
532 * " JobMedia.JobId=Job.JobId AND Job.PoolId=Media.PoolId";
539 * This is the central piece of code that finds a job or jobs
540 * actually JobIds to migrate. It first looks to see if one
541 * has been "manually" specified in jcr->MigrateJobId, and if
542 * so, it returns that JobId to be run. Otherwise, it
543 * examines the Selection Type to see what kind of migration
544 * we are doing (Volume, Job, Client, ...) and applies any
545 * Selection Pattern if appropriate to obtain a list of JobIds.
546 * Finally, it will loop over all the JobIds found, except the last
547 * one starting a new job with MigrationJobId set to that JobId, and
548 * finally, it returns the last JobId to the caller.
550 * Returns: false on error
551 * true if OK and jcr->previous_jr filled in
553 static bool get_job_to_migrate(JCR *jcr)
556 POOL_MEM query(PM_MESSAGE);
561 idpkt ids, mid, jids;
567 char dt[MAX_TIME_LENGTH];
569 ids.list = get_pool_memory(PM_MESSAGE);
572 mid.list = get_pool_memory(PM_MESSAGE);
575 jids.list = get_pool_memory(PM_MESSAGE);
581 * If MigrateJobId is set, then we migrate only that Job,
582 * otherwise, we go through the full selection of jobs to
585 if (jcr->MigrateJobId != 0) {
586 Dmsg1(dbglevel, "At Job start previous jobid=%u\n", jcr->MigrateJobId);
587 edit_uint64(jcr->MigrateJobId, ids.list);
590 switch (jcr->job->selection_type) {
592 if (!regex_find_jobids(jcr, &ids, sql_job, sql_jobids_from_job, "Job")) {
597 if (!regex_find_jobids(jcr, &ids, sql_client, sql_jobids_from_client, "Client")) {
602 if (!regex_find_jobids(jcr, &ids, sql_vol, sql_jobids_from_vol, "Volume")) {
607 if (!jcr->job->selection_pattern) {
608 Jmsg(jcr, M_FATAL, 0, _("No Migration SQL selection pattern specified.\n"));
611 Dmsg1(dbglevel, "SQL=%s\n", jcr->job->selection_pattern);
612 if (!db_sql_query(jcr->db, jcr->job->selection_pattern,
613 unique_dbid_handler, (void *)&ids)) {
614 Jmsg(jcr, M_FATAL, 0,
615 _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
619 case MT_SMALLEST_VOL:
620 if (!find_mediaid_then_jobids(jcr, &ids, sql_smallest_vol, "Smallest Volume")) {
625 if (!find_mediaid_then_jobids(jcr, &ids, sql_oldest_vol, "Oldest Volume")) {
630 case MT_POOL_OCCUPANCY:
632 /* Find count of bytes in pool */
633 Mmsg(query, sql_pool_bytes, jcr->pool->hdr.name);
634 if (!db_sql_query(jcr->db, query.c_str(), db_int64_handler, (void *)&ctx)) {
635 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
638 if (ctx.count == 0) {
639 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
642 pool_bytes = ctx.value;
643 Dmsg2(dbglevel, "highbytes=%d pool=%d\n", (int)jcr->pool->MigrationHighBytes,
645 if (pool_bytes < (int64_t)jcr->pool->MigrationHighBytes) {
646 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
649 Dmsg0(dbglevel, "We should do Occupation migration.\n");
652 /* Find a list of MediaIds that could be migrated */
653 Mmsg(query, sql_mediaids, jcr->pool->hdr.name);
654 Dmsg1(dbglevel, "query=%s\n", query.c_str());
655 if (!db_sql_query(jcr->db, query.c_str(), unique_dbid_handler, (void *)&ids)) {
656 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
659 if (ids.count == 0) {
660 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
663 Dmsg2(dbglevel, "Pool Occupancy ids=%d MediaIds=%s\n", ids.count, ids.list);
666 * Now loop over MediaIds getting more JobIds to migrate until
667 * we reduce the pool occupancy below the low water mark.
670 for (int i=0; i < (int)ids.count; i++) {
671 stat = get_next_dbid_from_list(&p, &MediaId);
672 Dmsg2(dbglevel, "get_next_dbid stat=%d MediaId=%u\n", stat, MediaId);
674 Jmsg(jcr, M_FATAL, 0, _("Invalid MediaId found.\n"));
676 } else if (stat == 0) {
680 Mmsg(mid.list, "%s", edit_int64(MediaId, ed1));
681 ok = find_jobids_from_mediaid_list(jcr, &mid, "Volumes");
686 pm_strcat(jids.list, ",");
688 pm_strcat(jids.list, mid.list);
689 jids.count += mid.count;
691 /* Now get the count of bytes added */
693 /* Find count of bytes from Jobs */
694 Mmsg(query, sql_job_bytes, mid.list);
695 if (!db_sql_query(jcr->db, query.c_str(), db_int64_handler, (void *)&ctx)) {
696 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
699 pool_bytes -= ctx.value;
700 Dmsg1(dbglevel, "Job bytes=%d\n", (int)ctx.value);
701 Dmsg2(dbglevel, "lowbytes=%d pool=%d\n", (int)jcr->pool->MigrationLowBytes,
703 if (pool_bytes <= (int64_t)jcr->pool->MigrationLowBytes) {
704 Dmsg0(dbglevel, "We should be done.\n");
709 Dmsg2(dbglevel, "Pool Occupancy ids=%d JobIds=%s\n", jids.count, jids.list);
714 ttime = time(NULL) - (time_t)jcr->pool->MigrationTime;
715 (void)localtime_r(&ttime, &tm);
716 strftime(dt, sizeof(dt), "%Y-%m-%d %H:%M:%S", &tm);
719 Mmsg(query, sql_pool_time, jcr->pool->hdr.name, dt);
720 Dmsg1(dbglevel, "query=%s\n", query.c_str());
721 if (!db_sql_query(jcr->db, query.c_str(), unique_dbid_handler, (void *)&ids)) {
722 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
725 if (ids.count == 0) {
726 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
729 Dmsg2(dbglevel, "PoolTime ids=%d JobIds=%s\n", ids.count, ids.list);
733 Jmsg(jcr, M_FATAL, 0, _("Unknown Migration Selection Type.\n"));
739 * Loop over all jobids except the last one, sending
740 * them to start_migration_job(), which will start a job
741 * for each of them. For the last JobId, we handle it below.
744 if (ids.count == 0) {
745 Jmsg(jcr, M_INFO, 0, _("No JobIds found to migrate.\n"));
748 Jmsg(jcr, M_INFO, 0, _("The following %u JobId%s will be migrated: %s\n"),
749 ids.count, ids.count==0?"":"s", ids.list);
750 Dmsg2(dbglevel, "Before loop count=%d ids=%s\n", ids.count, ids.list);
751 for (int i=1; i < (int)ids.count; i++) {
753 stat = get_next_jobid_from_list(&p, &JobId);
754 Dmsg3(dbglevel, "get_jobid_no=%d stat=%d JobId=%u\n", i, stat, JobId);
755 jcr->MigrateJobId = JobId;
756 start_migration_job(jcr);
757 Dmsg0(dbglevel, "Back from start_migration_job\n");
759 Jmsg(jcr, M_FATAL, 0, _("Invalid JobId found.\n"));
761 } else if (stat == 0) {
762 Jmsg(jcr, M_INFO, 0, _("No JobIds found to migrate.\n"));
767 /* Now get the last JobId and handle it in the current job */
769 stat = get_next_jobid_from_list(&p, &JobId);
770 Dmsg2(dbglevel, "Last get_next_jobid stat=%d JobId=%u\n", stat, (int)JobId);
772 Jmsg(jcr, M_FATAL, 0, _("Invalid JobId found.\n"));
774 } else if (stat == 0) {
775 Jmsg(jcr, M_INFO, 0, _("No JobIds found to migrate.\n"));
779 jcr->previous_jr.JobId = JobId;
780 Dmsg1(dbglevel, "Previous jobid=%d\n", (int)jcr->previous_jr.JobId);
782 if (!db_get_job_record(jcr, jcr->db, &jcr->previous_jr)) {
783 Jmsg(jcr, M_FATAL, 0, _("Could not get job record for JobId %s to migrate. ERR=%s"),
784 edit_int64(jcr->previous_jr.JobId, ed1),
785 db_strerror(jcr->db));
788 Jmsg(jcr, M_INFO, 0, _("Migration using JobId=%s Job=%s\n"),
789 edit_int64(jcr->previous_jr.JobId, ed1), jcr->previous_jr.Job);
790 Dmsg3(dbglevel, "Migration JobId=%d using JobId=%s Job=%s\n",
792 edit_int64(jcr->previous_jr.JobId, ed1), jcr->previous_jr.Job);
802 free_pool_memory(ids.list);
803 free_pool_memory(mid.list);
804 free_pool_memory(jids.list);
808 static void start_migration_job(JCR *jcr)
810 UAContext *ua = new_ua_context(jcr);
813 Mmsg(ua->cmd, "run %s jobid=%s", jcr->job->hdr.name,
814 edit_uint64(jcr->MigrateJobId, ed1));
815 Dmsg1(dbglevel, "=============== Migration cmd=%s\n", ua->cmd);
816 parse_ua_args(ua); /* parse command */
817 int stat = run_cmd(ua, ua->cmd);
819 Jmsg(jcr, M_ERROR, 0, _("Could not start migration job.\n"));
821 Jmsg(jcr, M_INFO, 0, _("Migration JobId %d started.\n"), stat);
826 static bool find_mediaid_then_jobids(JCR *jcr, idpkt *ids, const char *query1,
830 POOL_MEM query(PM_MESSAGE);
833 /* Basic query for MediaId */
834 Mmsg(query, query1, jcr->pool->hdr.name);
835 if (!db_sql_query(jcr->db, query.c_str(), unique_dbid_handler, (void *)ids)) {
836 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
839 if (ids->count == 0) {
840 Jmsg(jcr, M_INFO, 0, _("No %ss found to migrate.\n"), type);
842 if (ids->count != 1) {
843 Jmsg(jcr, M_FATAL, 0, _("SQL logic error. Count should be 1 but is %d\n"),
847 Dmsg1(dbglevel, "Smallest Vol Jobids=%s\n", ids->list);
849 ok = find_jobids_from_mediaid_list(jcr, ids, type);
855 static bool find_jobids_from_mediaid_list(JCR *jcr, idpkt *ids, const char *type)
858 POOL_MEM query(PM_MESSAGE);
860 Mmsg(query, sql_jobids_from_mediaid, ids->list);
862 if (!db_sql_query(jcr->db, query.c_str(), unique_dbid_handler, (void *)ids)) {
863 Jmsg(jcr, M_FATAL, 0, _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
866 if (ids->count == 0) {
867 Jmsg(jcr, M_INFO, 0, _("No %ss found to migrate.\n"), type);
874 static bool regex_find_jobids(JCR *jcr, idpkt *ids, const char *query1,
875 const char *query2, const char *type)
879 uitem *last_item = NULL;
884 POOL_MEM query(PM_MESSAGE);
886 item_chain = New(dlist(item, &item->link));
887 if (!jcr->job->selection_pattern) {
888 Jmsg(jcr, M_FATAL, 0, _("No Migration %s selection pattern specified.\n"),
892 Dmsg1(dbglevel, "regex=%s\n", jcr->job->selection_pattern);
893 /* Compile regex expression */
894 rc = regcomp(&preg, jcr->job->selection_pattern, REG_EXTENDED);
896 regerror(rc, &preg, prbuf, sizeof(prbuf));
897 Jmsg(jcr, M_FATAL, 0, _("Could not compile regex pattern \"%s\" ERR=%s\n"),
898 jcr->job->selection_pattern, prbuf);
901 /* Basic query for names */
902 Mmsg(query, query1, jcr->pool->hdr.name);
903 Dmsg1(dbglevel, "get name query1=%s\n", query.c_str());
904 if (!db_sql_query(jcr->db, query.c_str(), unique_name_handler,
905 (void *)item_chain)) {
906 Jmsg(jcr, M_FATAL, 0,
907 _("SQL to get %s failed. ERR=%s\n"), type, db_strerror(jcr->db));
910 /* Now apply the regex to the names and remove any item not matched */
911 foreach_dlist(item, item_chain) {
912 const int nmatch = 30;
913 regmatch_t pmatch[nmatch];
915 Dmsg1(dbglevel, "Remove item %s\n", last_item->item);
916 free(last_item->item);
917 item_chain->remove(last_item);
919 Dmsg1(dbglevel, "get name Item=%s\n", item->item);
920 rc = regexec(&preg, item->item, nmatch, pmatch, 0);
922 last_item = NULL; /* keep this one */
928 free(last_item->item);
929 Dmsg1(dbglevel, "Remove item %s\n", last_item->item);
930 item_chain->remove(last_item);
934 * At this point, we have a list of items in item_chain
935 * that have been matched by the regex, so now we need
936 * to look up their jobids.
939 foreach_dlist(item, item_chain) {
940 Dmsg2(dbglevel, "Got %s: %s\n", type, item->item);
941 Mmsg(query, query2, item->item, jcr->pool->hdr.name);
942 Dmsg1(dbglevel, "get id from name query2=%s\n", query.c_str());
943 if (!db_sql_query(jcr->db, query.c_str(), unique_dbid_handler, (void *)ids)) {
944 Jmsg(jcr, M_FATAL, 0,
945 _("SQL failed. ERR=%s\n"), db_strerror(jcr->db));
949 if (ids->count == 0) {
950 Jmsg(jcr, M_INFO, 0, _("No %ss found to migrate.\n"), type);
954 Dmsg2(dbglevel, "Count=%d Jobids=%s\n", ids->count, ids->list);
956 Dmsg0(dbglevel, "After delete item_chain\n");
962 * Release resources allocated during backup.
964 void migration_cleanup(JCR *jcr, int TermCode)
966 char sdt[MAX_TIME_LENGTH], edt[MAX_TIME_LENGTH];
967 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], elapsed[50];
968 char ec6[50], ec7[50], ec8[50];
969 char term_code[100], sd_term_msg[100];
970 const char *term_msg;
971 int msg_type = M_INFO;
975 JCR *mig_jcr = jcr->mig_jcr;
976 POOL_MEM query(PM_MESSAGE);
978 Dmsg2(100, "Enter migrate_cleanup %d %c\n", TermCode, TermCode);
979 dequeue_messages(jcr); /* display any queued messages */
980 memset(&mr, 0, sizeof(mr));
981 set_jcr_job_status(jcr, TermCode);
982 update_job_end_record(jcr); /* update database */
985 * Check if we actually did something.
986 * mig_jcr is jcr of the newly migrated job.
989 mig_jcr->JobFiles = jcr->JobFiles = jcr->SDJobFiles;
990 mig_jcr->JobBytes = jcr->JobBytes = jcr->SDJobBytes;
991 mig_jcr->VolSessionId = jcr->VolSessionId;
992 mig_jcr->VolSessionTime = jcr->VolSessionTime;
993 mig_jcr->jr.RealEndTime = 0;
994 mig_jcr->jr.PriorJobId = jcr->previous_jr.JobId;
996 set_jcr_job_status(mig_jcr, TermCode);
997 update_job_end_record(mig_jcr);
999 /* Update final items to set them to the previous job's values */
1000 Mmsg(query, "UPDATE Job SET StartTime='%s',EndTime='%s',"
1001 "JobTDate=%s WHERE JobId=%s",
1002 jcr->previous_jr.cStartTime, jcr->previous_jr.cEndTime,
1003 edit_uint64(jcr->previous_jr.JobTDate, ec1),
1004 edit_uint64(mig_jcr->jr.JobId, ec2));
1005 db_sql_query(mig_jcr->db, query.c_str(), NULL, NULL);
1007 /* Now mark the previous job as migrated if it terminated normally */
1008 if (jcr->JobStatus == JS_Terminated) {
1009 Mmsg(query, "UPDATE Job SET Type='%c' WHERE JobId=%s",
1010 (char)JT_MIGRATED_JOB, edit_uint64(jcr->previous_jr.JobId, ec1));
1011 db_sql_query(mig_jcr->db, query.c_str(), NULL, NULL);
1014 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
1015 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
1016 db_strerror(jcr->db));
1017 set_jcr_job_status(jcr, JS_ErrorTerminated);
1020 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
1021 if (!db_get_media_record(jcr, jcr->db, &mr)) {
1022 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
1023 mr.VolumeName, db_strerror(jcr->db));
1024 set_jcr_job_status(jcr, JS_ErrorTerminated);
1027 update_bootstrap_file(mig_jcr);
1029 if (!db_get_job_volume_names(mig_jcr, mig_jcr->db, mig_jcr->jr.JobId, &mig_jcr->VolumeName)) {
1031 * Note, if the job has failed, most likely it did not write any
1032 * tape, so suppress this "error" message since in that case
1033 * it is normal. Or look at it the other way, only for a
1034 * normal exit should we complain about this error.
1036 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
1037 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(mig_jcr->db));
1039 mig_jcr->VolumeName[0] = 0; /* none */
1041 switch (jcr->JobStatus) {
1043 if (jcr->Errors || jcr->SDErrors) {
1044 term_msg = _("%s OK -- with warnings");
1046 term_msg = _("%s OK");
1050 case JS_ErrorTerminated:
1051 term_msg = _("*** %s Error ***");
1052 msg_type = M_ERROR; /* Generate error message */
1053 if (jcr->store_bsock) {
1054 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
1055 if (jcr->SD_msg_chan) {
1056 pthread_cancel(jcr->SD_msg_chan);
1061 term_msg = _("%s Canceled");
1062 if (jcr->store_bsock) {
1063 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
1064 if (jcr->SD_msg_chan) {
1065 pthread_cancel(jcr->SD_msg_chan);
1070 term_msg = _("Inappropriate %s term code");
1074 term_msg = _("%s -- no files to migrate");
1077 bsnprintf(term_code, sizeof(term_code), term_msg, "Migration");
1078 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
1079 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
1080 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
1084 kbps = (double)jcr->SDJobBytes / (1000 * RunTime);
1088 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
1090 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
1091 " Prev Backup JobId: %s\n"
1092 " New Backup JobId: %s\n"
1093 " Migration JobId: %s\n"
1094 " Migration Job: %s\n"
1095 " Backup Level: %s%s\n"
1097 " FileSet: \"%s\" %s\n"
1098 " Pool: \"%s\" (From %s)\n"
1099 " Read Storage: \"%s\" (From %s)\n"
1100 " Write Storage: \"%s\" (From %s)\n"
1103 " Elapsed time: %s\n"
1105 " SD Files Written: %s\n"
1106 " SD Bytes Written: %s (%sB)\n"
1107 " Rate: %.1f KB/s\n"
1108 " Volume name(s): %s\n"
1109 " Volume Session Id: %d\n"
1110 " Volume Session Time: %d\n"
1111 " Last Volume Bytes: %s (%sB)\n"
1113 " SD termination status: %s\n"
1114 " Termination: %s\n\n"),
1118 edit_uint64(jcr->previous_jr.JobId, ec6),
1119 mig_jcr ? edit_uint64(mig_jcr->jr.JobId, ec7) : "0",
1120 edit_uint64(jcr->jr.JobId, ec8),
1122 level_to_str(jcr->JobLevel), jcr->since,
1123 jcr->client->name(),
1124 jcr->fileset->name(), jcr->FSCreateTime,
1125 jcr->pool->name(), jcr->pool_source,
1126 jcr->rstore?jcr->rstore->name():"*None*",
1127 NPRT(jcr->rstore_source),
1128 jcr->wstore?jcr->wstore->name():"*None*",
1129 NPRT(jcr->wstore_source),
1132 edit_utime(RunTime, elapsed, sizeof(elapsed)),
1134 edit_uint64_with_commas(jcr->SDJobFiles, ec1),
1135 edit_uint64_with_commas(jcr->SDJobBytes, ec2),
1136 edit_uint64_with_suffix(jcr->SDJobBytes, ec3),
1138 mig_jcr ? mig_jcr->VolumeName : "",
1140 jcr->VolSessionTime,
1141 edit_uint64_with_commas(mr.VolBytes, ec4),
1142 edit_uint64_with_suffix(mr.VolBytes, ec5),
1147 Dmsg1(100, "migrate_cleanup() mig_jcr=0x%x\n", jcr->mig_jcr);
1149 free_jcr(jcr->mig_jcr);
1150 jcr->mig_jcr = NULL;
1152 Dmsg0(100, "Leave migrate_cleanup()\n");
1156 * Return next DBId from comma separated list
1159 * 1 if next DBId returned
1160 * 0 if no more DBIds are in list
1161 * -1 there is an error
1163 static int get_next_dbid_from_list(char **p, DBId_t *DBId)
1169 for (int i=0; i<(int)sizeof(id); i++) {
1172 } else if (*q == ',') {
1181 } else if (!is_a_number(id)) {
1182 return -1; /* error */
1185 *DBId = str_to_int64(id);