3 * Bacula Director -- migrate.c -- responsible for doing
6 * Kern Sibbald, September MMIV
8 * Basic tasks done here:
9 * Open DB and create records for this job.
10 * Open Message Channel with Storage daemon to tell him a job will be starting.
11 * Open connection with Storage daemon and pass him commands
13 * When the Storage daemon finishes the job, update the DB.
18 Copyright (C) 2004-2006 Kern Sibbald
20 This program is free software; you can redistribute it and/or
21 modify it under the terms of the GNU General Public License
22 version 2 as amended with additional clauses defined in the
23 file LICENSE in the main source directory.
25 This program is distributed in the hope that it will be useful,
26 but WITHOUT ANY WARRANTY; without even the implied warranty of
27 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
28 the file LICENSE for additional details.
36 static char OKbootstrap[] = "3000 OK bootstrap\n";
37 static bool get_job_to_migrate(JCR *jcr);
40 * Called here before the job is run to do the job
43 bool do_migration_init(JCR *jcr)
47 if (!get_job_to_migrate(jcr)) {
51 if (jcr->previous_jr.JobId == 0) {
52 return true; /* no work */
55 if (!get_or_create_fileset_record(jcr)) {
60 * Get the Pool record -- first apply any level defined pools
62 switch (jcr->previous_jr.JobLevel) {
65 jcr->pool = jcr->full_pool;
70 jcr->pool = jcr->inc_pool;
75 jcr->pool = jcr->dif_pool;
79 memset(&pr, 0, sizeof(pr));
80 bstrncpy(pr.Name, jcr->pool->hdr.name, sizeof(pr.Name));
82 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
83 /* Try to create the pool */
84 if (create_pool(jcr, jcr->db, jcr->pool, POOL_OP_CREATE) < 0) {
85 Jmsg(jcr, M_FATAL, 0, _("Pool %s not in database. %s"), pr.Name,
86 db_strerror(jcr->db));
89 Jmsg(jcr, M_INFO, 0, _("Pool %s created in database.\n"), pr.Name);
92 jcr->jr.PoolId = pr.PoolId;
94 /* If pool storage specified, use it instead of job storage */
95 copy_storage(jcr, jcr->pool->storage);
98 Jmsg(jcr, M_FATAL, 0, _("No Storage specification found in Job or Pool.\n"));
102 if (!create_restore_bootstrap_file(jcr)) {
109 * Do a Migration of a previous job
111 * Returns: false on failure
114 bool do_migration(JCR *jcr)
123 if (jcr->previous_jr.JobId == 0) {
124 jcr->JobStatus = JS_Terminated;
125 migration_cleanup(jcr, jcr->JobStatus);
126 return true; /* no work */
128 Dmsg4(100, "Target: Name=%s JobId=%d Type=%c Level=%c\n",
129 jcr->previous_jr.Name, jcr->previous_jr.JobId,
130 jcr->previous_jr.JobType, jcr->previous_jr.JobLevel);
132 Dmsg4(100, "Current: Name=%s JobId=%d Type=%c Level=%c\n",
133 jcr->jr.Name, jcr->jr.JobId,
134 jcr->jr.JobType, jcr->jr.JobLevel);
137 job = (JOB *)GetResWithName(R_JOB, jcr->jr.Name);
138 tjob = (JOB *)GetResWithName(R_JOB, jcr->previous_jr.Name);
145 * Target jcr is the new Job that corresponds to the original
146 * target job. It "runs" at the same time as the current
147 * migration job and becomes a new backup job that replaces
148 * the original backup job. Most operations on the current
149 * migration jcr are also done on the target jcr.
151 tjcr = jcr->previous_jcr = new_jcr(sizeof(JCR), dird_free_jcr);
152 memcpy(&tjcr->previous_jr, &jcr->previous_jr, sizeof(tjcr->previous_jr));
154 /* Turn the tjcr into a "real" job */
155 set_jcr_defaults(tjcr, tjob);
156 if (!setup_job(tjcr)) {
159 /* Set output PoolId and FileSetId. */
160 tjcr->jr.PoolId = jcr->jr.PoolId;
161 tjcr->jr.FileSetId = jcr->jr.FileSetId;
164 * Get the PoolId used with the original job. Then
165 * find the pool name from the database record.
167 memset(&pr, 0, sizeof(pr));
168 pr.PoolId = tjcr->previous_jr.PoolId;
169 if (!db_get_pool_record(jcr, jcr->db, &pr)) {
170 Jmsg(jcr, M_FATAL, 0, _("Pool for JobId %s not in database. ERR=%s\n"),
171 edit_int64(pr.PoolId, ed1), db_strerror(jcr->db));
174 /* Get the pool resource corresponding to the original job */
175 pool = (POOL *)GetResWithName(R_POOL, pr.Name);
177 Jmsg(jcr, M_FATAL, 0, _("Pool resource \"%s\" not found.\n"), pr.Name);
181 /* Check Migration time and High/Low water marks */
184 /* If pool storage specified, use it for restore */
185 copy_storage(tjcr, pool->storage);
187 /* If the original backup pool has a NextPool, make sure a
188 * record exists in the database.
190 if (pool->NextPool) {
191 memset(&pr, 0, sizeof(pr));
192 bstrncpy(pr.Name, pool->NextPool->hdr.name, sizeof(pr.Name));
194 while (!db_get_pool_record(jcr, jcr->db, &pr)) { /* get by Name */
195 /* Try to create the pool */
196 if (create_pool(jcr, jcr->db, pool->NextPool, POOL_OP_CREATE) < 0) {
197 Jmsg(jcr, M_FATAL, 0, _("Pool \"%s\" not in database. %s"), pr.Name,
198 db_strerror(jcr->db));
201 Jmsg(jcr, M_INFO, 0, _("Pool \"%s\" created in database.\n"), pr.Name);
205 * put the "NextPool" resource pointer in our jcr so that we
206 * can pull the Storage reference from it.
208 tjcr->pool = jcr->pool = pool->NextPool;
209 tjcr->jr.PoolId = jcr->jr.PoolId = pr.PoolId;
212 /* If pool storage specified, use it instead of job storage for backup */
213 copy_storage(jcr, jcr->pool->storage);
215 /* Print Job Start message */
216 Jmsg(jcr, M_INFO, 0, _("Start Migration JobId %s, Job=%s\n"),
217 edit_uint64(jcr->JobId, ed1), jcr->Job);
219 set_jcr_job_status(jcr, JS_Running);
220 set_jcr_job_status(jcr, JS_Running);
221 Dmsg2(100, "JobId=%d JobLevel=%c\n", jcr->jr.JobId, jcr->jr.JobLevel);
222 if (!db_update_job_start_record(jcr, jcr->db, &jcr->jr)) {
223 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(jcr->db));
227 if (!db_update_job_start_record(tjcr, tjcr->db, &tjcr->jr)) {
228 Jmsg(jcr, M_FATAL, 0, "%s", db_strerror(tjcr->db));
234 * Open a message channel connection with the Storage
235 * daemon. This is to let him know that our client
236 * will be contacting him for a backup session.
239 Dmsg0(110, "Open connection with storage daemon\n");
240 set_jcr_job_status(jcr, JS_WaitSD);
241 set_jcr_job_status(tjcr, JS_WaitSD);
243 * Start conversation with Storage daemon
245 if (!connect_to_storage_daemon(jcr, 10, SDConnectTimeout, 1)) {
248 sd = jcr->store_bsock;
250 * Now start a job with the Storage daemon
252 Dmsg2(000, "Read store=%s, write store=%s\n",
253 ((STORE *)tjcr->storage->first())->hdr.name,
254 ((STORE *)jcr->storage->first())->hdr.name);
255 if (!start_storage_daemon_job(jcr, tjcr->storage, jcr->storage)) {
258 Dmsg0(150, "Storage daemon connection OK\n");
260 if (!send_bootstrap_file(jcr, sd) ||
261 !response(jcr, sd, OKbootstrap, "Bootstrap", DISPLAY_ERROR)) {
267 * Now start a Storage daemon message thread
269 if (!start_storage_daemon_message_thread(jcr)) {
273 if (!bnet_fsend(sd, "run")) {
277 set_jcr_job_status(jcr, JS_Running);
278 set_jcr_job_status(tjcr, JS_Running);
280 /* Pickup Job termination data */
281 /* Note, the SD stores in jcr->JobFiles/ReadBytes/JobBytes/Errors */
282 wait_for_storage_daemon_termination(jcr);
284 jcr->JobStatus = jcr->SDJobStatus;
285 if (jcr->JobStatus == JS_Terminated) {
286 migration_cleanup(jcr, jcr->JobStatus);
293 * Callback handler make list of JobIds
295 static int jobid_handler(void *ctx, int num_fields, char **row)
297 POOLMEM *JobIds = (POOLMEM *)ctx;
299 if (JobIds[0] != 0) {
300 pm_strcat(JobIds, ",");
302 pm_strcat(JobIds, row[0]);
306 const char *sql_smallest_vol =
307 "SELECT MediaId FROM Media,Pool WHERE"
308 " VolStatus in ('Full','Used') AND"
309 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
310 " ORDER BY VolBytes ASC LIMIT 1";
312 const char *sql_oldest_vol =
313 "SELECT MediaId FROM Media,Pool WHERE"
314 " VolStatus in ('Full','Used') AND"
315 " Media.PoolId=Pool.PoolId AND Pool.Name='%s'"
316 " ORDER BY LastWritten ASC LIMIT 1";
318 const char *sql_jobids_from_mediaid =
319 "SELECT DISTINCT Job.JobId FROM JobMedia,Job"
320 " WHERE JobMedia.JobId=Job.JobId AND JobMedia.MediaId=%s"
321 " ORDER by Job.StartTime";
326 * Returns: false on error
327 * true if OK and jcr->previous_jr filled in
329 static bool get_job_to_migrate(JCR *jcr)
332 POOL_MEM query(PM_MESSAGE);
333 POOLMEM *JobIds = get_pool_memory(PM_MESSAGE);
335 if (jcr->MigrateJobId != 0) {
336 jcr->previous_jr.JobId = jcr->MigrateJobId;
338 switch (jcr->job->selection_type) {
339 case MT_SMALLEST_VOL:
340 Mmsg(query, sql_smallest_vol, jcr->pool->hdr.name);
341 JobIds = get_pool_memory(PM_MESSAGE);
343 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
344 Jmsg(jcr, M_FATAL, 0,
345 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
348 if (JobIds[0] == 0) {
349 Jmsg(jcr, M_INFO, 0, _("No Volumes found to migrate.\n"));
352 Mmsg(query, sql_jobids_from_mediaid, JobIds);
354 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
355 Jmsg(jcr, M_FATAL, 0,
356 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
359 Dmsg1(000, "Jobids=%s\n", JobIds);
363 Mmsg(query, sql_oldest_vol, jcr->pool->hdr.name);
364 JobIds = get_pool_memory(PM_MESSAGE);
366 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
367 Jmsg(jcr, M_FATAL, 0,
368 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
371 if (JobIds[0] == 0) {
372 Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n"));
375 Mmsg(query, sql_jobids_from_mediaid, JobIds);
377 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
378 Jmsg(jcr, M_FATAL, 0,
379 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
382 Dmsg1(000, "Jobids=%s\n", JobIds);
385 case MT_POOL_OCCUPANCY:
397 if (!jcr->job->selection_pattern) {
398 Jmsg(jcr, M_FATAL, 0, _("No selection pattern specified.\n"));
401 if (!db_sql_query(jcr->db, query.c_str(), jobid_handler, (void *)JobIds)) {
402 Jmsg(jcr, M_FATAL, 0,
403 _("SQL to get Volume failed. ERR=%s\n"), db_strerror(jcr->db));
406 if (JobIds[0] == 0) {
407 Jmsg(jcr, M_INFO, 0, _("No jobs found to migrate.\n"));
410 Dmsg1(000, "Jobids=%s\n", JobIds);
414 Jmsg(jcr, M_FATAL, 0, _("Unknown Migration Selection Type.\n"));
418 Dmsg1(100, "Last jobid=%d\n", jcr->previous_jr.JobId);
420 if (!db_get_job_record(jcr, jcr->db, &jcr->previous_jr)) {
421 Jmsg(jcr, M_FATAL, 0, _("Could not get job record for JobId %s to migrate. ERR=%s"),
422 edit_int64(jcr->previous_jr.JobId, ed1),
423 db_strerror(jcr->db));
426 Jmsg(jcr, M_INFO, 0, _("Migration using JobId=%d Job=%s\n"),
427 jcr->previous_jr.JobId, jcr->previous_jr.Job);
430 free_pool_memory(JobIds);
434 free_pool_memory(JobIds);
440 * Release resources allocated during backup.
442 void migration_cleanup(JCR *jcr, int TermCode)
444 char sdt[MAX_TIME_LENGTH], edt[MAX_TIME_LENGTH];
445 char ec1[30], ec2[30], ec3[30], ec4[30], ec5[30], elapsed[50];
446 char term_code[100], sd_term_msg[100];
447 const char *term_msg;
452 JCR *tjcr = jcr->previous_jcr;
453 POOL_MEM query(PM_MESSAGE);
455 /* Ensure target is defined to avoid a lot of testing */
459 tjcr->JobFiles = jcr->JobFiles = jcr->SDJobFiles;
460 tjcr->JobBytes = jcr->JobBytes = jcr->SDJobBytes;
461 tjcr->VolSessionId = jcr->VolSessionId;
462 tjcr->VolSessionTime = jcr->VolSessionTime;
464 Dmsg2(100, "Enter migrate_cleanup %d %c\n", TermCode, TermCode);
465 dequeue_messages(jcr); /* display any queued messages */
466 memset(&mr, 0, sizeof(mr));
467 set_jcr_job_status(jcr, TermCode);
468 set_jcr_job_status(tjcr, TermCode);
471 update_job_end_record(jcr); /* update database */
472 update_job_end_record(tjcr);
474 Mmsg(query, "UPDATE Job SET StartTime='%s',EndTime='%s',"
475 "JobTDate=%s WHERE JobId=%s",
476 jcr->previous_jr.cStartTime, jcr->previous_jr.cEndTime,
477 edit_uint64(jcr->previous_jr.JobTDate, ec1),
478 edit_uint64(tjcr->jr.JobId, ec2));
479 db_sql_query(tjcr->db, query.c_str(), NULL, NULL);
481 if (!db_get_job_record(jcr, jcr->db, &jcr->jr)) {
482 Jmsg(jcr, M_WARNING, 0, _("Error getting job record for stats: %s"),
483 db_strerror(jcr->db));
484 set_jcr_job_status(jcr, JS_ErrorTerminated);
487 bstrncpy(mr.VolumeName, jcr->VolumeName, sizeof(mr.VolumeName));
488 if (!db_get_media_record(jcr, jcr->db, &mr)) {
489 Jmsg(jcr, M_WARNING, 0, _("Error getting Media record for Volume \"%s\": ERR=%s"),
490 mr.VolumeName, db_strerror(jcr->db));
491 set_jcr_job_status(jcr, JS_ErrorTerminated);
494 update_bootstrap_file(tjcr);
496 msg_type = M_INFO; /* by default INFO message */
497 switch (jcr->JobStatus) {
499 if (jcr->Errors || jcr->SDErrors) {
500 term_msg = _("%s OK -- with warnings");
502 term_msg = _("%s OK");
506 case JS_ErrorTerminated:
507 term_msg = _("*** %s Error ***");
508 msg_type = M_ERROR; /* Generate error message */
509 if (jcr->store_bsock) {
510 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
511 if (jcr->SD_msg_chan) {
512 pthread_cancel(jcr->SD_msg_chan);
517 term_msg = _("%s Canceled");
518 if (jcr->store_bsock) {
519 bnet_sig(jcr->store_bsock, BNET_TERMINATE);
520 if (jcr->SD_msg_chan) {
521 pthread_cancel(jcr->SD_msg_chan);
526 term_msg = _("Inappropriate %s term code");
529 bsnprintf(term_code, sizeof(term_code), term_msg, "Migration");
530 bstrftimes(sdt, sizeof(sdt), jcr->jr.StartTime);
531 bstrftimes(edt, sizeof(edt), jcr->jr.EndTime);
532 RunTime = jcr->jr.EndTime - jcr->jr.StartTime;
536 kbps = (double)jcr->jr.JobBytes / (1000 * RunTime);
538 if (!db_get_job_volume_names(tjcr, tjcr->db, tjcr->jr.JobId, &tjcr->VolumeName)) {
540 * Note, if the job has erred, most likely it did not write any
541 * tape, so suppress this "error" message since in that case
542 * it is normal. Or look at it the other way, only for a
543 * normal exit should we complain about this error.
545 if (jcr->JobStatus == JS_Terminated && jcr->jr.JobBytes) {
546 Jmsg(jcr, M_ERROR, 0, "%s", db_strerror(tjcr->db));
548 tjcr->VolumeName[0] = 0; /* none */
551 jobstatus_to_ascii(jcr->SDJobStatus, sd_term_msg, sizeof(sd_term_msg));
553 // bmicrosleep(15, 0); /* for debugging SIGHUP */
555 Jmsg(jcr, msg_type, 0, _("Bacula %s (%s): %s\n"
556 " Old Backup JobId: %u\n"
557 " New Backup JobId: %u\n"
560 " Backup Level: %s%s\n"
562 " FileSet: \"%s\" %s\n"
566 " Elapsed time: %s\n"
568 " SD Files Written: %s\n"
569 " SD Bytes Written: %s (%sB)\n"
571 " Volume name(s): %s\n"
572 " Volume Session Id: %d\n"
573 " Volume Session Time: %d\n"
574 " Last Volume Bytes: %s (%sB)\n"
576 " SD termination status: %s\n"
577 " Termination: %s\n\n"),
581 jcr->previous_jr.JobId,
585 level_to_str(jcr->JobLevel), jcr->since,
586 jcr->client->hdr.name,
587 jcr->fileset->hdr.name, jcr->FSCreateTime,
591 edit_utime(RunTime, elapsed, sizeof(elapsed)),
593 edit_uint64_with_commas(jcr->SDJobFiles, ec1),
594 edit_uint64_with_commas(jcr->SDJobBytes, ec2),
595 edit_uint64_with_suffix(jcr->jr.JobBytes, ec3),
600 edit_uint64_with_commas(mr.VolBytes, ec4),
601 edit_uint64_with_suffix(mr.VolBytes, ec5),
606 Dmsg1(100, "Leave migrate_cleanup() previous_jcr=0x%x\n", jcr->previous_jcr);
607 if (jcr->previous_jcr) {
608 free_jcr(jcr->previous_jcr);