3 * Bacula Director -- migrate.c -- responsible for doing
6 * Kern Sibbald, September MMIV
8 * Basic tasks done here:
9 * Open DB and create records for this job.
10 * Open Message Channel with Storage daemon to tell him a job will be starting.
11 * Open connection with Storage daemon and pass him commands
13 * When the Storage daemon finishes the job, update the DB.
18 Copyright (C) 2004-2006 Kern Sibbald
20 This program is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License
22 version 2 as amended with additional clauses defined in the
23 file LICENSE in the main source directory.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 the file LICENSE for additional details.
36 static char OKbootstrap[] = "3000 OK bootstrap\n";
37 static bool get_job_to_migrate(JCR *jcr);
40 * Called here before the job is run to do the job
43 bool do_migration_init(JCR *jcr)
47 if (!get_job_to_migrate(jcr)) {
51 if (jcr->previous_jr.JobId == 0) {
52 return true; /* no work */
55 if (!get_or_create_fileset_record(jcr)) {
60 * Get the Pool record -- first apply any level defined pools
62 switch (jcr->previous_jr.JobLevel) {
65 jcr->pool = jcr->full_pool;
70 jcr->pool = jcr->inc_pool;
75 jcr->pool = jcr->dif_pool;
79 memset(&pr, 0, sizeof(pr));
80 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
82 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
83 /* Try to create the pool */
84 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
85 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
86 db_strerror(jcr->db));
89 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
92 jcr->jr.PoolId = pr.PoolId;
94 /* If pool storage specified, use it instead of job storage */
95 copy_storage(jcr, jcr->pool->storage);
98 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
102 if (!create_restore_bootstrap_file(jcr)) {
109 * Do a Migration of a previous job
111 * Returns: false on failure
114 bool do_migration(JCR *jcr)
123 if (jcr->previous_jr.JobId == 0) {
124 jcr->JobStatus = JS_Terminated;
125 migration_cleanup(jcr, jcr->JobStatus);
126 return true; /* no work */
128 Dmsg4(100, "Target: Name=%s JobId=%d Type=%c Level=%c\n",
129 jcr->previous_jr.Name, jcr->previous_jr.JobId,
130 jcr->previous_jr.JobType, jcr->previous_jr.JobLevel);
132 Dmsg4(100, "Current: Name=%s JobId=%d Type=%c Level=%c\n",
133 jcr->jr.Name, jcr->jr.JobId,
134 jcr->jr.JobType, jcr->jr.JobLevel);
137 job = (JOB *)GetResWithName(R_JOB, jcr->jr.Name);
138 tjob = (JOB *)GetResWithName(R_JOB, jcr->previous_jr.Name);
145 * Target jcr is the new Job that corresponds to the original
146 * target job. It "runs" at the same time as the current
147 * migration job and becomes a new backup job that replaces
148 * the original backup job. Most operations on the current
149 * migration jcr are also done on the target jcr.
151 tjcr = jcr->previous_jcr = new_jcr(sizeof(JCR), dird_free_jcr);
152 memcpy(&tjcr->previous_jr, &jcr->previous_jr, sizeof(tjcr->previous_jr));
154 /* Turn the tjcr into a "real" job */
155 set_jcr_defaults(tjcr, tjob);
156 if (!setup_job(tjcr)) {
159 /* Set output PoolId and FileSetId. */
160 tjcr->jr.PoolId = jcr->jr.PoolId;
161 tjcr->jr.FileSetId = jcr->jr.FileSetId;
164 * Get the PoolId used with the original job. Then
165 * find the pool name from the database record.
167 memset(&pr, 0, sizeof(pr));
168 pr.PoolId = tjcr->previous_jr.PoolId;
169 if (!db_get_pool_record(jcr, jcr->db, &pr)) {
170 Jmsg(jcr, M_FATAL, 0, _("Pool for JobId %s not in database. ERR=%s\n"),
171 edit_int64(pr.PoolId, ed1), db_strerror(jcr->db));
174 /* Get the pool resource corresponding to the original job */
175 pool = (POOL *)GetResWithName(R_POOL, pr.Name);
177 Jmsg(jcr, M_FATAL, 0, _("Pool resource \"%s\" not found.\n"), pr.Name);
181 /* Check Migration time and High/Low water marks */
184 /* If pool storage specified, use it for restore */
185 copy_storage(tjcr, pool->storage);
187 /* If the original backup pool has a NextPool, make sure a
188 * record exists in the database.
190 if (pool->NextPool) {
191 memset(&pr, 0, sizeof(pr));
192 bstrncpy(pr.Name, pool->NextPool->hdr.name, sizeof(pr.Name));
194 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
195 /* Try to create the pool */
196 if (create_pool(jcr, jcr->db, pool->NextPool, POOL_OP_CREATE) < 0) {
197 Jmsg(jcr, M_FATAL, 0, _("Pool \"%s\" not in database. %s"), pr.Name,
198 db_strerror(jcr->db));
201 Jmsg(jcr, M_INFO, 0, _("Pool \"%s\" created in database.\n"), pr.Name);
205 * put the "NextPool" resource pointer in our jcr so that we
206 * can pull the Storage reference from it.
208 tjcr->pool = jcr->pool = pool->NextPool;
209 tjcr->jr.PoolId = jcr->jr.PoolId = pr.PoolId;
212 /* If pool storage specified, use it instead of job storage for backup */
213 copy_storage(jcr, jcr->pool->storage);
215 /* Print Job Start message */
216 Jmsg(jcr, M_INFO, 0, _("Start Migration JobId %s, Job=%s\n"),
217 edit_uint64(jcr->JobId, ed1), jcr->Job);
219 set_jcr_job_status(jcr, JS_Running);
220 set_jcr_job_status(jcr, JS_Running);
221 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
222 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
223 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
227 if (!db_update_job_start_record(tjcr, tjcr->db, &tjcr->jr)) {
228 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(tjcr->db));
234 * Open a message channel connection with the Storage
235 * daemon. This is to let him know that our client
236 * will be contacting him for a backup session.
239 Dmsg0(110, "Open connection with storage daemon\n");
240 set_jcr_job_status(jcr, JS_WaitSD);
241 set_jcr_job_status(tjcr, JS_WaitSD);
243 * Start conversation with Storage daemon
245 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
248 sd = jcr->store_bsock;
250 * Now start a job with the Storage daemon
252 Dmsg2(000, "Read store=%s, write store=%s\n",
253 ((STORE *)tjcr->storage->first())->hdr.name,
254 ((STORE *)jcr->storage->first())->hdr.name);
255 if (!start_storage_daemon_job(jcr, tjcr->storage, jcr->storage)) {
258 Dmsg0(150, "Storage daemon connection OK\n");
260 if (!send_bootstrap_file(jcr, sd) ||
261 !response(jcr, sd, OKbootstrap, "Bootstrap", DISPLAY_ERROR)) {
267 * Now start a Storage daemon message thread
269 if (!start_storage_daemon_message_thread(jcr)) {
273 if (!bnet_fsend(sd, "run")) {
277 set_jcr_job_status(jcr, JS_Running);
278 set_jcr_job_status(tjcr, JS_Running);
280 /* Pickup Job termination data */
281 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
282 wait_for_storage_daemon_termination(jcr);
284 jcr->JobStatus = jcr->SDJobStatus;
285 if (jcr->JobStatus == JS_Terminated) {
286 migration_cleanup(jcr, jcr->JobStatus);
293 * Callback handler make list of JobIds
295 static int jobid_handler(void *ctx, int num_fields, char **row)
297 POOLMEM *JobIds = (POOLMEM *)ctx;
299 if (JobIds[0] != 0) {
300 pm_strcat(JobIds, ",");
302 pm_strcat(JobIds, row[0]);
306 const char *sql_smallest_vol =
307 "SELECT MediaId FROM Media,Pool WHERE"
308 " VolStatus in ('Full','Used','Error') AND"
309 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
310 " ORDER BY VolBytes ASC LIMIT 1";
312 const char *sql_oldest_vol =
313 "SELECT MediaId FROM Media,Pool WHERE"
314 " VolStatus in ('Full','Used','Error') AND"
315 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
316 " ORDER BY LastWritten ASC LIMIT 1";
318 const char *sql_jobids_from_mediaid =
319 "SELECT DISTINCT Job.JobId FROM JobMedia,Job"
320 " WHERE JobMedia.JobId=Job.JobId AND JobMedia.MediaId=%s"
321 " ORDER by Job.StartTime";
323 const char *sql_pool_bytes =
324 "SELECT SUM(VolBytes) FROM Media,Pool WHERE"
325 " VolStatus in ('Full','Used','Error','Append') AND"
326 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'";
328 const char *sql_vol_bytes =
329 "SELECT MediaId FROM Media,Pool WHERE"
330 " VolStatus in ('Full','Used','Error') AND"
331 " Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND"
332 " VolBytes<%s ORDER BY LastWritten ASC LIMIT 1";
334 const char *sql_client =
335 "SELECT DISTINCT Client.Name from Client,Pool,Media,Job,JobMedia "
336 " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND"
337 " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND"
338 " Job.PoolId=Media.PoolId";
340 const char *sql_job =
341 "SELECT DISTINCT Job.Name from Client,Pool,Media,Job,JobMedia "
342 " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND"
343 " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND"
344 " Job.PoolId=Media.PoolId";
346 const char *sql_ujobid =
347 "SELECT DISTINCT Job.Job from Client,Pool,Media,Job,JobMedia "
348 " WHERE Media.PoolId=Pool.PoolId AND Pool.Name='%s' AND"
349 " JobMedia.JobId=Job.JobId AND Job.ClientId=Client.ClientId AND"
350 " Job.PoolId=Media.PoolId";
352 const char *sql_vol =
353 "SELECT DISTINCT VolumeName FROM Media,Pool WHERE"
354 " VolStatus in ('Full','Used','Error') AND"
355 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'";
360 * Returns: false on error
361 * true if OK and jcr->previous_jr filled in
363 static bool get_job_to_migrate(JCR *jcr)
366 POOL_MEM query(PM_MESSAGE);
367 POOLMEM *JobIds = get_pool_memory(PM_MESSAGE);
372 if (jcr->MigrateJobId != 0) {
373 jcr->previous_jr.JobId = jcr->MigrateJobId;
375 switch (jcr->job->selection_type) {
376 case MT_SMALLEST_VOL:
377 Mmsg(query, sql_smallest_vol, jcr->pool->hdr.name);
378 JobIds = get_pool_memory(PM_MESSAGE);
380 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
381 Jmsg(jcr, M_FATAL, 0,
382 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
385 if (JobIds[0] == 0) {
386 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
389 Mmsg(query, sql_jobids_from_mediaid, JobIds);
391 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
392 Jmsg(jcr, M_FATAL, 0,
393 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
396 Dmsg1(000, "Jobids=%s\n", JobIds);
399 Mmsg(query, sql_oldest_vol, jcr->pool->hdr.name);
400 JobIds = get_pool_memory(PM_MESSAGE);
402 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
403 Jmsg(jcr, M_FATAL, 0,
404 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
407 if (JobIds[0] == 0) {
408 Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n"));
411 Mmsg(query, sql_jobids_from_mediaid, JobIds);
413 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
414 Jmsg(jcr, M_FATAL, 0,
415 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
418 Dmsg1(000, "Jobids=%s\n", JobIds);
420 case MT_POOL_OCCUPANCY:
421 Mmsg(query, sql_pool_bytes, jcr->pool->hdr.name);
422 JobIds = get_pool_memory(PM_MESSAGE);
424 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
425 Jmsg(jcr, M_FATAL, 0,
426 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
429 if (JobIds[0] == 0) {
430 Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n"));
444 if (!jcr->job->selection_pattern) {
445 Jmsg(jcr, M_FATAL, 0, _("No selection pattern specified.\n"));
448 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
449 Jmsg(jcr, M_FATAL, 0,
450 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
453 if (JobIds[0] == 0) {
454 Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n"));
457 Dmsg1(000, "Jobids=%s\n", JobIds);
461 Jmsg(jcr, M_FATAL, 0, _("Unknown Migration Selection Type.\n"));
467 stat = get_next_jobid_from_list(&p, &JobId);
468 Dmsg2(000, "get_next_jobid stat=%d JobId=%u\n", stat, JobId);
470 Jmsg(jcr, M_FATAL, 0, _("Invalid JobId found.\n"));
472 } else if (stat == 0) {
473 Jmsg(jcr, M_INFO, 0, _("No JobIds found to migrate.\n"));
477 jcr->previous_jr.JobId = JobId;
478 Dmsg1(000, "Last jobid=%d\n", jcr->previous_jr.JobId);
480 if (!db_get_job_record(jcr, jcr->db, &jcr->previous_jr)) {
481 Jmsg(jcr, M_FATAL, 0, _("Could not get job record for JobId %s to migrate. ERR=%s"),
482 edit_int64(jcr->previous_jr.JobId, ed1),
483 db_strerror(jcr->db));
486 Jmsg(jcr, M_INFO, 0, _("Migration using JobId=%d Job=%s\n"),
487 jcr->previous_jr.JobId, jcr->previous_jr.Job);
490 free_pool_memory(JobIds);
494 free_pool_memory(JobIds);
500 * Release resources allocated during backup.
502 void migration_cleanup(JCR *jcr, int TermCode)
504 char sdt[MAX_TIME_LENGTH], edt[MAX_TIME_LENGTH];
505 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], elapsed[50];
506 char term_code[100], sd_term_msg[100];
507 const char *term_msg;
512 JCR *tjcr = jcr->previous_jcr;
513 POOL_MEM query(PM_MESSAGE);
515 /* Ensure target is defined to avoid a lot of testing */
519 tjcr->JobFiles = jcr->JobFiles = jcr->SDJobFiles;
520 tjcr->JobBytes = jcr->JobBytes = jcr->SDJobBytes;
521 tjcr->VolSessionId = jcr->VolSessionId;
522 tjcr->VolSessionTime = jcr->VolSessionTime;
524 Dmsg2(100, "Enter migrate_cleanup %d %c\n", TermCode, TermCode);
525 dequeue_messages(jcr); /* display any queued messages */
526 memset(&mr, 0, sizeof(mr));
527 set_jcr_job_status(jcr, TermCode);
528 set_jcr_job_status(tjcr, TermCode);
531 update_job_end_record(jcr); /* update database */
532 update_job_end_record(tjcr);
534 Mmsg(query, "UPDATE Job SET StartTime='%s',EndTime='%s',"
535 "JobTDate=%s WHERE JobId=%s",
536 jcr->previous_jr.cStartTime, jcr->previous_jr.cEndTime,
537 edit_uint64(jcr->previous_jr.JobTDate, ec1),
538 edit_uint64(tjcr->jr.JobId, ec2));
539 db_sql_query(tjcr->db, query.c_str(), NULL, NULL);
541 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
542 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
543 db_strerror(jcr->db));
544 set_jcr_job_status(jcr, JS_ErrorTerminated);
547 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
548 if (!db_get_media_record(jcr, jcr->db, &mr)) {
549 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
550 mr.VolumeName, db_strerror(jcr->db));
551 set_jcr_job_status(jcr, JS_ErrorTerminated);
554 update_bootstrap_file(tjcr);
556 msg_type = M_INFO; /* by default INFO message */
557 switch (jcr->JobStatus) {
559 if (jcr->Errors || jcr->SDErrors) {
560 term_msg = _("%s OK -- with warnings");
562 term_msg = _("%s OK");
566 case JS_ErrorTerminated:
567 term_msg = _("*** %s Error ***");
568 msg_type = M_ERROR; /* Generate error message */
569 if (jcr->store_bsock) {
570 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
571 if (jcr->SD_msg_chan) {
572 pthread_cancel(jcr->SD_msg_chan);
577 term_msg = _("%s Canceled");
578 if (jcr->store_bsock) {
579 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
580 if (jcr->SD_msg_chan) {
581 pthread_cancel(jcr->SD_msg_chan);
586 term_msg = _("Inappropriate %s term code");
589 bsnprintf(term_code, sizeof(term_code), term_msg, "Migration");
590 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
591 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
592 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
596 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
598 if (!db_get_job_volume_names(tjcr, tjcr->db, tjcr->jr.JobId, &tjcr->VolumeName)) {
600 * Note, if the job has erred, most likely it did not write any
601 * tape, so suppress this "error" message since in that case
602 * it is normal. Or look at it the other way, only for a
603 * normal exit should we complain about this error.
605 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
606 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(tjcr->db));
608 tjcr->VolumeName[0] = 0; /* none */
611 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
613 // bmicrosleep(15, 0); /* for debugging SIGHUP */
615 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
616 " Old Backup JobId: %u\n"
617 " New Backup JobId: %u\n"
620 " Backup Level: %s%s\n"
622 " FileSet: \"%s\" %s\n"
626 " Elapsed time: %s\n"
628 " SD Files Written: %s\n"
629 " SD Bytes Written: %s (%sB)\n"
631 " Volume name(s): %s\n"
632 " Volume Session Id: %d\n"
633 " Volume Session Time: %d\n"
634 " Last Volume Bytes: %s (%sB)\n"
636 " SD termination status: %s\n"
637 " Termination: %s\n\n"),
641 jcr->previous_jr.JobId,
645 level_to_str(jcr->JobLevel), jcr->since,
646 jcr->client->hdr.name,
647 jcr->fileset->hdr.name, jcr->FSCreateTime,
651 edit_utime(RunTime, elapsed, sizeof(elapsed)),
653 edit_uint64_with_commas(jcr->SDJobFiles, ec1),
654 edit_uint64_with_commas(jcr->SDJobBytes, ec2),
655 edit_uint64_with_suffix(jcr->jr.JobBytes, ec3),
660 edit_uint64_with_commas(mr.VolBytes, ec4),
661 edit_uint64_with_suffix(mr.VolBytes, ec5),
666 Dmsg1(100, "Leave migrate_cleanup() previous_jcr=0x%x\n", jcr->previous_jcr);
667 if (jcr->previous_jcr) {
668 free_jcr(jcr->previous_jcr);