From f815c961561470cc92aa3173a1e01b19d8bd4846 Mon Sep 17 00:00:00 2001 From: Kern Sibbald Date: Mon, 24 Mar 2008 13:30:48 +0000 Subject: [PATCH] 24Mar08 kes Tweak Win32 mount point code. kes Fix a couple of bugs in the accurate code (bad sscanf, buffer not cleared giving bad file estimate). kes Implement BIG_MALLOC in htable code. Runs 2.5 times faster for 5 Million entries. git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@6672 91ce42f0-d328-0410-95d8-f526ca767f89 --- bacula/src/dird/backup.c | 2 + bacula/src/filed/backup.c | 61 ++++++++++++++------------- bacula/src/lib/htable.c | 67 ++++++++++++++++++++---------- bacula/src/lib/htable.h | 15 +++++-- bacula/src/version.h | 6 +-- bacula/src/win32/compat/compat.cpp | 49 +++++++++++++--------- bacula/technotes-2.3 | 6 +++ 7 files changed, 127 insertions(+), 79 deletions(-) diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c index 2fbc295b86..1d596b6998 100644 --- a/bacula/src/dird/backup.c +++ b/bacula/src/dird/backup.c @@ -144,8 +144,10 @@ bool send_accurate_current_files(JCR *jcr) /* to be able to allocate the right size for htable */ POOLMEM *nb = get_pool_memory(PM_FNAME); + *nb = 0; /* clear buffer */ Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids); db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb); + Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb); jcr->file_bsock->fsend("accurate files=%s\n", nb); db_get_file_list(jcr, jcr->db, jobids, accurate_list_handler, (void *)jcr); diff --git a/bacula/src/filed/backup.c b/bacula/src/filed/backup.c index 7db7d620b2..3e201917e5 100644 --- a/bacula/src/filed/backup.c +++ b/bacula/src/filed/backup.c @@ -115,7 +115,6 @@ bail_out: } /* - * This function doesn't work very well with smartalloc * TODO: use bigbuffer from htable */ int accurate_cmd(JCR *jcr) @@ -124,11 +123,11 @@ int accurate_cmd(JCR *jcr) int len; struct stat statp; int32_t LinkFIc; - uint64_t nb; + int32_t nb; CurFile *elt=NULL; char *lstat; - if (jcr->accurate==false || job_canceled(jcr) || jcr->JobLevel==L_FULL) { + if (!jcr->accurate || job_canceled(jcr) || jcr->JobLevel==L_FULL) { return true; } @@ -136,6 +135,7 @@ int accurate_cmd(JCR *jcr) dir->fsend(_("2991 Bad accurate command\n")); return false; } + Dmsg2(200, "nb=%d msg=%s\n", nb, dir->msg); jcr->file_list = (htable *)malloc(sizeof(htable)); jcr->file_list->init(elt, &elt->link, nb); @@ -146,16 +146,13 @@ int accurate_cmd(JCR *jcr) */ /* get current files */ while (dir->recv() >= 0) { - len = strlen(dir->msg); - if ((len+1) < dir->msglen) { -// elt = (CurFile *)malloc(sizeof(CurFile)); -// elt->fname = (char *) malloc(dir->msglen+1); - + len = strlen(dir->msg) + 1; + if (len < dir->msglen) { /* we store CurFile, fname and ctime/mtime in the same chunk */ - elt = (CurFile *)malloc(sizeof(CurFile)+len+1); - elt->fname = (char *) elt+sizeof(CurFile); + elt = (CurFile *)jcr->file_list->hash_malloc(sizeof(CurFile)+len); + elt->fname = (char *)elt+sizeof(CurFile); strcpy(elt->fname, dir->msg); - lstat = dir->msg + len + 1; + lstat = dir->msg + len; decode_stat(lstat, &statp, &LinkFIc); /* decode catalog stat */ elt->ctime = statp.st_ctime; elt->mtime = statp.st_mtime; @@ -164,17 +161,20 @@ int accurate_cmd(JCR *jcr) Dmsg2(500, "add fname=%s lstat=%s\n", elt->fname, lstat); } } + +#ifdef DEBUG extern void *start_heap; char b1[50], b2[50], b3[50], b4[50], b5[50]; Dmsg5(1," Heap: heap=%s smbytes=%s max_bytes=%s bufs=%s max_bufs=%s\n", - edit_uint64_with_commas((char *)sbrk(0)-(char *)start_heap, b1), - edit_uint64_with_commas(sm_bytes, b2), - edit_uint64_with_commas(sm_max_bytes, b3), - edit_uint64_with_commas(sm_buffers, b4), - edit_uint64_with_commas(sm_max_buffers, b5)); + edit_uint64_with_commas((char *)sbrk(0)-(char *)start_heap, b1), + edit_uint64_with_commas(sm_bytes, b2), + edit_uint64_with_commas(sm_max_bytes, b3), + edit_uint64_with_commas(sm_buffers, b4), + edit_uint64_with_commas(sm_max_buffers, b5)); // jcr->file_list->stats(); +#endif return true; } @@ -186,7 +186,7 @@ bool accurate_send_deleted_list(JCR *jcr) int stream = STREAM_UNIX_ATTRIBUTES; - if (jcr->accurate == false || jcr->JobLevel == L_FULL) { + if (!jcr->accurate || jcr->JobLevel == L_FULL) { goto bail_out; } @@ -267,7 +267,7 @@ bool blast_data_to_storage_daemon(JCR *jcr, char *addr) } else { buf_size = 0; /* use default */ } - if (!bnet_set_buffer_size(sd, buf_size, BNET_SETBUF_WRITE)) { + if (!sd->set_buffer_size(buf_size, BNET_SETBUF_WRITE)) { set_jcr_job_status(jcr, JS_ErrorTerminated); Jmsg(jcr, M_FATAL, 0, _("Cannot set buffer size FD->SD.\n")); return false; @@ -275,13 +275,14 @@ bool blast_data_to_storage_daemon(JCR *jcr, char *addr) jcr->buf_size = sd->msglen; /* Adjust for compression so that output buffer is - * 12 bytes + 0.1% larger than input buffer plus 18 bytes. - * This gives a bit extra plus room for the sparse addr if any. - * Note, we adjust the read size to be smaller so that the - * same output buffer can be used without growing it. + * 12 bytes + 0.1% larger than input buffer plus 18 bytes. + * This gives a bit extra plus room for the sparse addr if any. + * Note, we adjust the read size to be smaller so that the + * same output buffer can be used without growing it. * - * The zlib compression workset is initialized here to minimise - * the "per file" load. The jcr member is only set, if the init was successful. + * The zlib compression workset is initialized here to minimize + * the "per file" load. The jcr member is only set, if the init + * was successful. */ jcr->compress_buf_size = jcr->buf_size + ((jcr->buf_size+999) / 1000) + 30; jcr->compress_buf = get_memory(jcr->compress_buf_size); @@ -529,21 +530,21 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level) break; case FT_NOACCESS: { berrno be; - Jmsg(jcr, M_NOTSAVED, 0, _(" Could not access %s: ERR=%s\n"), ff_pkt->fname, + Jmsg(jcr, M_NOTSAVED, 0, _(" Could not access \"%s\": ERR=%s\n"), ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno)); jcr->Errors++; return 1; } case FT_NOFOLLOW: { berrno be; - Jmsg(jcr, M_NOTSAVED, 0, _(" Could not follow link %s: ERR=%s\n"), + Jmsg(jcr, M_NOTSAVED, 0, _(" Could not follow link \"%s\": ERR=%s\n"), ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno)); jcr->Errors++; return 1; } case FT_NOSTAT: { berrno be; - Jmsg(jcr, M_NOTSAVED, 0, _(" Could not stat %s: ERR=%s\n"), ff_pkt->fname, + Jmsg(jcr, M_NOTSAVED, 0, _(" Could not stat \"%s\": ERR=%s\n"), ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno)); jcr->Errors++; return 1; @@ -557,7 +558,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level) return 1; case FT_NOOPEN: { berrno be; - Jmsg(jcr, M_NOTSAVED, 0, _(" Could not open directory %s: ERR=%s\n"), + Jmsg(jcr, M_NOTSAVED, 0, _(" Could not open directory \"%s\": ERR=%s\n"), ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno)); jcr->Errors++; return 1; @@ -689,7 +690,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level) if (bopen(&ff_pkt->bfd, ff_pkt->fname, O_RDONLY | O_BINARY | noatime, 0) < 0) { ff_pkt->ff_errno = errno; berrno be; - Jmsg(jcr, M_NOTSAVED, 0, _(" Cannot open %s: ERR=%s.\n"), ff_pkt->fname, + Jmsg(jcr, M_NOTSAVED, 0, _(" Cannot open \"%s\": ERR=%s.\n"), ff_pkt->fname, be.bstrerror()); jcr->Errors++; if (tid) { @@ -726,7 +727,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level) if (!bopen_rsrc(&ff_pkt->bfd, ff_pkt->fname, O_RDONLY | O_BINARY, 0) < 0) { ff_pkt->ff_errno = errno; berrno be; - Jmsg(jcr, M_NOTSAVED, -1, _(" Cannot open resource fork for %s: ERR=%s.\n"), + Jmsg(jcr, M_NOTSAVED, -1, _(" Cannot open resource fork for \"%s\": ERR=%s.\n"), ff_pkt->fname, be.bstrerror()); jcr->Errors++; if (is_bopen(&ff_pkt->bfd)) { diff --git a/bacula/src/lib/htable.c b/bacula/src/lib/htable.c index 9284839656..280939dcf3 100644 --- a/bacula/src/lib/htable.c +++ b/bacula/src/lib/htable.c @@ -76,8 +76,23 @@ void htable::malloc_buf(int size) Dmsg2(200, "malloc buf size=%d rem=%d\n", size, hmem->rem); } -char *htable::hash_alloc(int size) +/* This routine frees the whole tree */ +void htable::hash_free() { + struct h_mem *hmem, *rel; + + for (hmem=mem; hmem; ) { + rel = hmem; + hmem = hmem->next; + free(rel); + } +} + +#endif + +char *htable::hash_malloc(int size) +{ +#ifdef BIG_MALLOC char *buf; int asize = BALIGN(size); @@ -94,21 +109,14 @@ char *htable::hash_alloc(int size) buf = mem->mem; mem->mem += asize; return buf; +#else + total_size += size; + blocks++; + return (char *)malloc(size); +#endif } -/* This routine frees the whole tree */ -void htable::hash_free() -{ - struct h_mem *hmem, *rel; - - for (hmem=mem; hmem; ) { - rel = hmem; - hmem = hmem->next; - free(rel); - } -} -#endif /* @@ -126,6 +134,9 @@ void htable::hash_index(char *key) Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index); } +/* + * tsize is the estimated number of entries in the hash table + */ htable::htable(void *item, void *link, int tsize) { init(item, link, tsize); @@ -134,6 +145,10 @@ htable::htable(void *item, void *link, int tsize) void htable::init(void *item, void *link, int tsize) { int pwr; + + if (tsize < 31) { + tsize = 31; + } tsize >>= 2; for (pwr=0; tsize; pwr++) { tsize >>= 1; @@ -148,9 +163,11 @@ void htable::init(void *item, void *link, int tsize) memset(table, 0, buckets * sizeof(hlink *)); walkptr = NULL; walk_index = 0; + total_size = 0; + blocks = 0; #ifdef BIG_MALLOC mem = NULL; - malloc_buf(1000000); /* ***FIXME*** base off of size */ + malloc_buf(1000000); /* ***FIXME*** need variable or some estimate */ #endif } @@ -163,7 +180,7 @@ uint32_t htable::size() * Take each hash link and walk down the chain of items * that hash there counting them (i.e. the hits), * then report that number. - * Obiously, the more hits in a chain, the more time + * Obiously, the more hits in a chain, the more time * it takes to reference them. Empty chains are not so * hot either -- as it means unused or wasted space. */ @@ -196,7 +213,12 @@ void htable::stats() for (i=0; i < MAX_COUNT; i++) { printf("%2d: %d\n",i, hits[i]); } + printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items); printf("max hits in a bucket = %d\n", max); +#ifdef BIG_MALLOC + printf("total bytes malloced = %d\n", total_size); + printf("total blocks malloced = %d\n", blocks); +#endif } void htable::grow_table() @@ -328,6 +350,9 @@ void *htable::first() /* Destroy the table and its contents */ void htable::destroy() { +#ifdef BIG_MALLOC + hash_free(); +#else void *ni; void *li = first(); @@ -336,6 +361,7 @@ void htable::destroy() free(li); li=ni; } +#endif free(table); table = NULL; @@ -351,7 +377,7 @@ struct MYJCR { hlink link; }; -#define NITEMS 1000000 +#define NITEMS 5000000 int main() { @@ -369,13 +395,8 @@ int main() int len; len = sprintf(mkey, "This is htable item %d", i) + 1; -#ifdef BIG_MALLOC - jcr = (MYJCR *)jcrtbl->hash_alloc(sizeof(MYJCR)); - jcr->key = (char *)jcrtbl->hash_alloc(len); -#else - jcr = (MYJCR *)malloc(sizeof(MYJCR)); - jcr->key = (char *)malloc(len); -#endif + jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR)); + jcr->key = (char *)jcrtbl->hash_malloc(len); memcpy(jcr->key, mkey, len); Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr); diff --git a/bacula/src/lib/htable.h b/bacula/src/lib/htable.h index 4e08eb65eb..48dcc2ebac 100644 --- a/bacula/src/lib/htable.h +++ b/bacula/src/lib/htable.h @@ -40,17 +40,25 @@ /* * BIG_MALLOC is to provide a large malloc service to htable - * not yet implemented, and not yet working. */ -//#define BIG_MALLOC +#define BIG_MALLOC /* * Loop var through each member of table */ +#ifdef HAVE_TYPEOF +#define foreach_htable(var, tbl) \ + for((var)=(typeof(var))((tbl)->first()); \ + (var); \ + (var)=(typeof(var))((tbl)->next())) +#else #define foreach_htable(var, tbl) \ for((*((void **)&(var))=(void *)((tbl)->first())); \ (var); \ (*((void **)&(var))=(void *)((tbl)->next()))) +#endif + + struct hlink { void *next; /* next hash item */ @@ -85,6 +93,7 @@ class htable : public SMARTALLOC { #endif void hash_index(char *key); /* produce hash key,index */ void grow_table(); /* grow the table */ + public: htable(void *item, void *link, int tsize = 31); ~htable() { destroy(); } @@ -96,8 +105,8 @@ public: void destroy(); void stats(); /* print stats about the table */ uint32_t size(); /* return size of table */ + char *hash_malloc(int size); /* malloc bytes for a hash entry */ #ifdef BIG_MALLOC - char *hash_alloc(int size); /* malloc bytes for a hash entry */ void hash_free(); /* free all hash allocated bytes */ #endif }; diff --git a/bacula/src/version.h b/bacula/src/version.h index 759288cd91..9a6abf24fd 100644 --- a/bacula/src/version.h +++ b/bacula/src/version.h @@ -3,9 +3,9 @@ */ #undef VERSION -#define VERSION "2.3.12" -#define BDATE "14 March 2008" -#define LSMDATE "14Mar08" +#define VERSION "2.3.14" +#define BDATE "24 March 2008" +#define LSMDATE "24Mar08" #define PROG_COPYRIGHT "Copyright (C) %d-2008 Free Software Foundation Europe e.V.\n" #define BYEAR "2008" /* year for copyright messages in progs */ diff --git a/bacula/src/win32/compat/compat.cpp b/bacula/src/win32/compat/compat.cpp index a3af259e96..152dc23b1a 100644 --- a/bacula/src/win32/compat/compat.cpp +++ b/bacula/src/win32/compat/compat.cpp @@ -632,14 +632,19 @@ statDir(const char *file, struct stat *sb) FILETIME *pftLastWriteTime; FILETIME *pftCreationTime; + /* + * Oh, cool, another exception: Microsoft doesn't let us do + * FindFile operations on a Drive, so simply fake root attibutes. + */ if (file[1] == ':' && file[2] == 0) { - Dmsg1(99, "faking ROOT attrs(%s).\n", file); - sb->st_mode = S_IFDIR; - sb->st_mode |= S_IREAD|S_IEXEC|S_IWRITE; - time(&sb->st_ctime); - time(&sb->st_mtime); - time(&sb->st_atime); - return 0; + time_t now = time(NULL); + Dmsg1(99, "faking ROOT attrs(%s).\n", file); + sb->st_mode = S_IFDIR; + sb->st_mode |= S_IREAD|S_IEXEC|S_IWRITE; + sb->st_ctime = now; + sb->st_mtime = now; + sb->st_atime = now; + return 0; } HANDLE h = INVALID_HANDLE_VALUE; @@ -649,7 +654,7 @@ statDir(const char *file, struct stat *sb) POOLMEM* pwszBuf = get_pool_memory (PM_FNAME); make_win32_path_UTF8_2_wchar(&pwszBuf, file); - h = p_FindFirstFileW((LPCWSTR) pwszBuf, &info_w); + h = p_FindFirstFileW((LPCWSTR)pwszBuf, &info_w); free_pool_memory(pwszBuf); pdwFileAttributes = &info_w.dwFileAttributes; @@ -799,8 +804,9 @@ stat2(const char *file, struct stat *sb) rval = fstat((int)h, sb); CloseHandle(h); - if (attr & FILE_ATTRIBUTE_DIRECTORY) { - return statDir(tmpbuf, sb); + if (attr & FILE_ATTRIBUTE_DIRECTORY && + file[1] == ':' && file[2] != 0) { + statDir(file, sb); } return rval; @@ -815,19 +821,12 @@ stat(const char *file, struct stat *sb) memset(sb, 0, sizeof(*sb)); - /* why not allow win 95 to use p_GetFileAttributesExA ? - * this function allows _some_ open files to be stat'ed - * if (g_platform_id == VER_PLATFORM_WIN32_WINDOWS) { - * return stat2(file, sb); - * } - */ - if (p_GetFileAttributesExW) { /* dynamically allocate enough space for UCS2 filename */ POOLMEM* pwszBuf = get_pool_memory (PM_FNAME); make_win32_path_UTF8_2_wchar(&pwszBuf, file); - BOOL b = p_GetFileAttributesExW((LPCWSTR) pwszBuf, GetFileExInfoStandard, &data); + BOOL b = p_GetFileAttributesExW((LPCWSTR)pwszBuf, GetFileExInfoStandard, &data); free_pool_memory(pwszBuf); if (!b) { @@ -869,8 +868,18 @@ stat(const char *file, struct stat *sb) sb->st_atime = cvt_ftime_to_utime(data.ftLastAccessTime); sb->st_mtime = cvt_ftime_to_utime(data.ftLastWriteTime); sb->st_ctime = cvt_ftime_to_utime(data.ftCreationTime); - if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - return statDir(file, sb); + + /* + * If we are not at the root, then to distinguish a reparse + * point from a mount point, we must call FindFirstFile() to + * get the WIN32_FIND_DATA, which has the bit that indicates + * that this directory is a mount point -- aren't Win32 APIs + * wonderful? (sarcasm). The code exists in the statDir + * subroutine. + */ + if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY && + file[1] == ':' && file[2] != 0) { + statDir(file, sb); } return 0; } diff --git a/bacula/technotes-2.3 b/bacula/technotes-2.3 index 53a207495a..3d3c11b674 100644 --- a/bacula/technotes-2.3 +++ b/bacula/technotes-2.3 @@ -22,6 +22,12 @@ Add long term statistic job table General: +24Mar08 +kes Tweak Win32 mount point code. +kes Fix a couple of bugs in the accurate code (bad sscanf, buffer not + cleared giving bad file estimate). +kes Implement BIG_MALLOC in htable code. Runs 2.5 times faster for + 5 Million entries. 22Mar08 kes Keep Schedule run lists in order as suggested by Bastian Friedrich. -- 2.39.5