From f815c961561470cc92aa3173a1e01b19d8bd4846 Mon Sep 17 00:00:00 2001
From: Kern Sibbald <kern@sibbald.com>
Date: Mon, 24 Mar 2008 13:30:48 +0000
Subject: [PATCH] 24Mar08 kes  Tweak Win32 mount point code. kes  Fix a couple
 of bugs in the accurate code (bad sscanf, buffer not      cleared giving bad
 file estimate). kes  Implement BIG_MALLOC in htable code. Runs 2.5 times
 faster for      5 Million entries.

git-svn-id: https://bacula.svn.sourceforge.net/svnroot/bacula/trunk@6672 91ce42f0-d328-0410-95d8-f526ca767f89
---
 bacula/src/dird/backup.c           |  2 +
 bacula/src/filed/backup.c          | 61 ++++++++++++++-------------
 bacula/src/lib/htable.c            | 67 ++++++++++++++++++++----------
 bacula/src/lib/htable.h            | 15 +++++--
 bacula/src/version.h               |  6 +--
 bacula/src/win32/compat/compat.cpp | 49 +++++++++++++---------
 bacula/technotes-2.3               |  6 +++
 7 files changed, 127 insertions(+), 79 deletions(-)

diff --git a/bacula/src/dird/backup.c b/bacula/src/dird/backup.c
index 2fbc295b86..1d596b6998 100644
--- a/bacula/src/dird/backup.c
+++ b/bacula/src/dird/backup.c
@@ -144,8 +144,10 @@ bool send_accurate_current_files(JCR *jcr)
 
    /* to be able to allocate the right size for htable */
    POOLMEM *nb = get_pool_memory(PM_FNAME);
+   *nb = 0;                           /* clear buffer */
    Mmsg(buf, "SELECT sum(JobFiles) FROM Job WHERE JobId IN (%s)",jobids);
    db_sql_query(jcr->db, buf.c_str(), db_get_int_handler, nb);
+   Dmsg2(200, "jobids=%s nb=%s\n", jobids, nb);
    jcr->file_bsock->fsend("accurate files=%s\n", nb); 
 
    db_get_file_list(jcr, jcr->db, jobids, accurate_list_handler, (void *)jcr);
diff --git a/bacula/src/filed/backup.c b/bacula/src/filed/backup.c
index 7db7d620b2..3e201917e5 100644
--- a/bacula/src/filed/backup.c
+++ b/bacula/src/filed/backup.c
@@ -115,7 +115,6 @@ bail_out:
 }
 
 /* 
- * This function doesn't work very well with smartalloc
  * TODO: use bigbuffer from htable
  */
 int accurate_cmd(JCR *jcr)
@@ -124,11 +123,11 @@ int accurate_cmd(JCR *jcr)
    int len;
    struct stat statp;
    int32_t LinkFIc;
-   uint64_t nb;
+   int32_t nb;
    CurFile *elt=NULL;
    char *lstat;
 
-   if (jcr->accurate==false || job_canceled(jcr) || jcr->JobLevel==L_FULL) {
+   if (!jcr->accurate || job_canceled(jcr) || jcr->JobLevel==L_FULL) {
       return true;
    }
 
@@ -136,6 +135,7 @@ int accurate_cmd(JCR *jcr)
       dir->fsend(_("2991 Bad accurate command\n"));
       return false;
    }
+   Dmsg2(200, "nb=%d msg=%s\n", nb, dir->msg);
 
    jcr->file_list = (htable *)malloc(sizeof(htable));
    jcr->file_list->init(elt, &elt->link, nb);
@@ -146,16 +146,13 @@ int accurate_cmd(JCR *jcr)
     */
    /* get current files */
    while (dir->recv() >= 0) {
-      len = strlen(dir->msg);
-      if ((len+1) < dir->msglen) {
-//       elt = (CurFile *)malloc(sizeof(CurFile));
-//       elt->fname  = (char *) malloc(dir->msglen+1);
-
+      len = strlen(dir->msg) + 1;
+      if (len < dir->msglen) {
          /* we store CurFile, fname and ctime/mtime in the same chunk */
-         elt = (CurFile *)malloc(sizeof(CurFile)+len+1);
-         elt->fname  = (char *) elt+sizeof(CurFile);
+         elt = (CurFile *)jcr->file_list->hash_malloc(sizeof(CurFile)+len);
+         elt->fname  = (char *)elt+sizeof(CurFile);
          strcpy(elt->fname, dir->msg);
-         lstat = dir->msg + len + 1;
+         lstat = dir->msg + len;
          decode_stat(lstat, &statp, &LinkFIc); /* decode catalog stat */
          elt->ctime = statp.st_ctime;
          elt->mtime = statp.st_mtime;
@@ -164,17 +161,20 @@ int accurate_cmd(JCR *jcr)
          Dmsg2(500, "add fname=%s lstat=%s\n", elt->fname, lstat);
       }
    }
+
+#ifdef DEBUG
    extern void *start_heap;
 
    char b1[50], b2[50], b3[50], b4[50], b5[50];
    Dmsg5(1," Heap: heap=%s smbytes=%s max_bytes=%s bufs=%s max_bufs=%s\n",
-	 edit_uint64_with_commas((char *)sbrk(0)-(char *)start_heap, b1),
-	 edit_uint64_with_commas(sm_bytes, b2),
-	 edit_uint64_with_commas(sm_max_bytes, b3),
-	 edit_uint64_with_commas(sm_buffers, b4),
-	 edit_uint64_with_commas(sm_max_buffers, b5));
+         edit_uint64_with_commas((char *)sbrk(0)-(char *)start_heap, b1),
+         edit_uint64_with_commas(sm_bytes, b2),
+         edit_uint64_with_commas(sm_max_bytes, b3),
+         edit_uint64_with_commas(sm_buffers, b4),
+         edit_uint64_with_commas(sm_max_buffers, b5));
 
 //   jcr->file_list->stats();
+#endif
 
    return true;
 }
@@ -186,7 +186,7 @@ bool accurate_send_deleted_list(JCR *jcr)
 
    int stream = STREAM_UNIX_ATTRIBUTES;
 
-   if (jcr->accurate == false || jcr->JobLevel == L_FULL) {
+   if (!jcr->accurate || jcr->JobLevel == L_FULL) {
       goto bail_out;
    }
 
@@ -267,7 +267,7 @@ bool blast_data_to_storage_daemon(JCR *jcr, char *addr)
    } else {
       buf_size = 0;                   /* use default */
    }
-   if (!bnet_set_buffer_size(sd, buf_size, BNET_SETBUF_WRITE)) {
+   if (!sd->set_buffer_size(buf_size, BNET_SETBUF_WRITE)) {
       set_jcr_job_status(jcr, JS_ErrorTerminated);
       Jmsg(jcr, M_FATAL, 0, _("Cannot set buffer size FD->SD.\n"));
       return false;
@@ -275,13 +275,14 @@ bool blast_data_to_storage_daemon(JCR *jcr, char *addr)
 
    jcr->buf_size = sd->msglen;
    /* Adjust for compression so that output buffer is
-    * 12 bytes + 0.1% larger than input buffer plus 18 bytes.
-    * This gives a bit extra plus room for the sparse addr if any.
-    * Note, we adjust the read size to be smaller so that the
-    * same output buffer can be used without growing it.
+    *  12 bytes + 0.1% larger than input buffer plus 18 bytes.
+    *  This gives a bit extra plus room for the sparse addr if any.
+    *  Note, we adjust the read size to be smaller so that the
+    *  same output buffer can be used without growing it.
     *
-    * The zlib compression workset is initialized here to minimise
-    * the "per file" load. The jcr member is only set, if the init was successful.
+    * The zlib compression workset is initialized here to minimize
+    *  the "per file" load. The jcr member is only set, if the init 
+    *  was successful.
     */
    jcr->compress_buf_size = jcr->buf_size + ((jcr->buf_size+999) / 1000) + 30;
    jcr->compress_buf = get_memory(jcr->compress_buf_size);
@@ -529,21 +530,21 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level)
       break;
    case FT_NOACCESS: {
       berrno be;
-      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not access %s: ERR=%s\n"), ff_pkt->fname,
+      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not access \"%s\": ERR=%s\n"), ff_pkt->fname,
          be.bstrerror(ff_pkt->ff_errno));
       jcr->Errors++;
       return 1;
    }
    case FT_NOFOLLOW: {
       berrno be;
-      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not follow link %s: ERR=%s\n"), 
+      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not follow link \"%s\": ERR=%s\n"), 
            ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno));
       jcr->Errors++;
       return 1;
    }
    case FT_NOSTAT: {
       berrno be;
-      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not stat %s: ERR=%s\n"), ff_pkt->fname,
+      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not stat \"%s\": ERR=%s\n"), ff_pkt->fname,
          be.bstrerror(ff_pkt->ff_errno));
       jcr->Errors++;
       return 1;
@@ -557,7 +558,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level)
       return 1;
    case FT_NOOPEN: {
       berrno be;
-      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not open directory %s: ERR=%s\n"), 
+      Jmsg(jcr, M_NOTSAVED, 0, _("     Could not open directory \"%s\": ERR=%s\n"), 
            ff_pkt->fname, be.bstrerror(ff_pkt->ff_errno));
       jcr->Errors++;
       return 1;
@@ -689,7 +690,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level)
       if (bopen(&ff_pkt->bfd, ff_pkt->fname, O_RDONLY | O_BINARY | noatime, 0) < 0) {
          ff_pkt->ff_errno = errno;
          berrno be;
-         Jmsg(jcr, M_NOTSAVED, 0, _("     Cannot open %s: ERR=%s.\n"), ff_pkt->fname,
+         Jmsg(jcr, M_NOTSAVED, 0, _("     Cannot open \"%s\": ERR=%s.\n"), ff_pkt->fname,
               be.bstrerror());
          jcr->Errors++;
          if (tid) {
@@ -726,7 +727,7 @@ int save_file(JCR *jcr, FF_PKT *ff_pkt, bool top_level)
          if (!bopen_rsrc(&ff_pkt->bfd, ff_pkt->fname, O_RDONLY | O_BINARY, 0) < 0) {
             ff_pkt->ff_errno = errno;
             berrno be;
-            Jmsg(jcr, M_NOTSAVED, -1, _("     Cannot open resource fork for %s: ERR=%s.\n"), 
+            Jmsg(jcr, M_NOTSAVED, -1, _("     Cannot open resource fork for \"%s\": ERR=%s.\n"), 
                  ff_pkt->fname, be.bstrerror());
             jcr->Errors++;
             if (is_bopen(&ff_pkt->bfd)) {
diff --git a/bacula/src/lib/htable.c b/bacula/src/lib/htable.c
index 9284839656..280939dcf3 100644
--- a/bacula/src/lib/htable.c
+++ b/bacula/src/lib/htable.c
@@ -76,8 +76,23 @@ void htable::malloc_buf(int size)
    Dmsg2(200, "malloc buf size=%d rem=%d\n", size, hmem->rem);
 }
 
-char *htable::hash_alloc(int size)
+/* This routine frees the whole tree */
+void htable::hash_free()
 {
+   struct h_mem *hmem, *rel;
+
+   for (hmem=mem; hmem; ) {
+      rel = hmem;
+      hmem = hmem->next;
+      free(rel);
+   }
+}
+
+#endif
+
+char *htable::hash_malloc(int size)
+{
+#ifdef BIG_MALLOC
    char *buf;
    int asize = BALIGN(size);
 
@@ -94,21 +109,14 @@ char *htable::hash_alloc(int size)
    buf = mem->mem;
    mem->mem += asize;
    return buf;
+#else 
+   total_size += size;
+   blocks++;
+   return (char *)malloc(size);
+#endif
 }
 
 
-/* This routine frees the whole tree */
-void htable::hash_free()
-{
-   struct h_mem *hmem, *rel;
-
-   for (hmem=mem; hmem; ) {
-      rel = hmem;
-      hmem = hmem->next;
-      free(rel);
-   }
-}
-#endif
  
 
 /*
@@ -126,6 +134,9 @@ void htable::hash_index(char *key)
    Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
 }
 
+/*
+ * tsize is the estimated number of entries in the hash table
+ */
 htable::htable(void *item, void *link, int tsize)
 {
    init(item, link, tsize);
@@ -134,6 +145,10 @@ htable::htable(void *item, void *link, int tsize)
 void htable::init(void *item, void *link, int tsize)
 {
    int pwr;
+
+   if (tsize < 31) {
+      tsize = 31;
+   }
    tsize >>= 2;
    for (pwr=0; tsize; pwr++) {
       tsize >>= 1;
@@ -148,9 +163,11 @@ void htable::init(void *item, void *link, int tsize)
    memset(table, 0, buckets * sizeof(hlink *));
    walkptr = NULL;
    walk_index = 0;
+   total_size = 0;
+   blocks = 0;
 #ifdef BIG_MALLOC
    mem = NULL;
-   malloc_buf(1000000);   /* ***FIXME*** base off of size */
+   malloc_buf(1000000);   /* ***FIXME*** need variable or some estimate */
 #endif
 }
 
@@ -163,7 +180,7 @@ uint32_t htable::size()
  * Take each hash link and walk down the chain of items
  *  that hash there counting them (i.e. the hits), 
  *  then report that number.
- *  Obiously, the more hits in a chain, the more time
+ * Obiously, the more hits in a chain, the more time
  *  it takes to reference them. Empty chains are not so
  *  hot either -- as it means unused or wasted space.
  */
@@ -196,7 +213,12 @@ void htable::stats()
    for (i=0; i < MAX_COUNT; i++) {
       printf("%2d:           %d\n",i, hits[i]);
    }
+   printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
    printf("max hits in a bucket = %d\n", max);
+#ifdef BIG_MALLOC
+   printf("total bytes malloced = %d\n", total_size);
+   printf("total blocks malloced = %d\n", blocks);
+#endif
 }
 
 void htable::grow_table()
@@ -328,6 +350,9 @@ void *htable::first()
 /* Destroy the table and its contents */
 void htable::destroy()
 {
+#ifdef BIG_MALLOC
+   hash_free();
+#else
    void *ni;
    void *li = first();
 
@@ -336,6 +361,7 @@ void htable::destroy()
       free(li);
       li=ni;
    }
+#endif
 
    free(table);
    table = NULL;
@@ -351,7 +377,7 @@ struct MYJCR {
    hlink link;
 };
 
-#define NITEMS 1000000
+#define NITEMS 5000000
 
 int main()
 {
@@ -369,13 +395,8 @@ int main()
       int len;
       len = sprintf(mkey, "This is htable item %d", i) + 1;
 
-#ifdef BIG_MALLOC
-      jcr = (MYJCR *)jcrtbl->hash_alloc(sizeof(MYJCR));
-      jcr->key = (char *)jcrtbl->hash_alloc(len);
-#else
-      jcr = (MYJCR *)malloc(sizeof(MYJCR));
-      jcr->key = (char *)malloc(len);
-#endif
+      jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
+      jcr->key = (char *)jcrtbl->hash_malloc(len);
       memcpy(jcr->key, mkey, len);
       Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
 
diff --git a/bacula/src/lib/htable.h b/bacula/src/lib/htable.h
index 4e08eb65eb..48dcc2ebac 100644
--- a/bacula/src/lib/htable.h
+++ b/bacula/src/lib/htable.h
@@ -40,17 +40,25 @@
 
 /* 
  * BIG_MALLOC is to provide a large malloc service to htable
- *   not yet implemented, and not yet working.
  */
-//#define BIG_MALLOC
+#define BIG_MALLOC
 
 /*
  * Loop var through each member of table
  */
+#ifdef HAVE_TYPEOF
+#define foreach_htable(var, tbl) \
+        for((var)=(typeof(var))((tbl)->first()); \
+           (var); \
+           (var)=(typeof(var))((tbl)->next()))
+#else
 #define foreach_htable(var, tbl) \
         for((*((void **)&(var))=(void *)((tbl)->first())); \
             (var); \
             (*((void **)&(var))=(void *)((tbl)->next())))
+#endif
+
+
 
 struct hlink {
    void *next;                        /* next hash item */
@@ -85,6 +93,7 @@ class htable : public SMARTALLOC {
 #endif
    void hash_index(char *key);        /* produce hash key,index */
    void grow_table();                 /* grow the table */
+
 public:
    htable(void *item, void *link, int tsize = 31);
    ~htable() { destroy(); }
@@ -96,8 +105,8 @@ public:
    void destroy();
    void stats();                      /* print stats about the table */
    uint32_t size();                   /* return size of table */
+   char *hash_malloc(int size);       /* malloc bytes for a hash entry */
 #ifdef BIG_MALLOC
-   char *hash_alloc(int size);        /* malloc bytes for a hash entry */
    void hash_free();                  /* free all hash allocated bytes */
 #endif
 };
diff --git a/bacula/src/version.h b/bacula/src/version.h
index 759288cd91..9a6abf24fd 100644
--- a/bacula/src/version.h
+++ b/bacula/src/version.h
@@ -3,9 +3,9 @@
  */
 
 #undef  VERSION
-#define VERSION "2.3.12"
-#define BDATE   "14 March 2008"
-#define LSMDATE "14Mar08"
+#define VERSION "2.3.14"
+#define BDATE   "24 March 2008"
+#define LSMDATE "24Mar08"
 
 #define PROG_COPYRIGHT "Copyright (C) %d-2008 Free Software Foundation Europe e.V.\n"
 #define BYEAR "2008"       /* year for copyright messages in progs */
diff --git a/bacula/src/win32/compat/compat.cpp b/bacula/src/win32/compat/compat.cpp
index a3af259e96..152dc23b1a 100644
--- a/bacula/src/win32/compat/compat.cpp
+++ b/bacula/src/win32/compat/compat.cpp
@@ -632,14 +632,19 @@ statDir(const char *file, struct stat *sb)
    FILETIME *pftLastWriteTime;
    FILETIME *pftCreationTime;
 
+   /* 
+    * Oh, cool, another exception: Microsoft doesn't let us do 
+    *  FindFile operations on a Drive, so simply fake root attibutes.
+    */
    if (file[1] == ':' && file[2] == 0) {
-        Dmsg1(99, "faking ROOT attrs(%s).\n", file);
-        sb->st_mode = S_IFDIR;
-        sb->st_mode |= S_IREAD|S_IEXEC|S_IWRITE;
-        time(&sb->st_ctime);
-        time(&sb->st_mtime);
-        time(&sb->st_atime);
-        return 0;
+      time_t now = time(NULL);
+      Dmsg1(99, "faking ROOT attrs(%s).\n", file);
+      sb->st_mode = S_IFDIR;
+      sb->st_mode |= S_IREAD|S_IEXEC|S_IWRITE;
+      sb->st_ctime = now;
+      sb->st_mtime = now;
+      sb->st_atime = now;
+      return 0;
     }
 
    HANDLE h = INVALID_HANDLE_VALUE;
@@ -649,7 +654,7 @@ statDir(const char *file, struct stat *sb)
       POOLMEM* pwszBuf = get_pool_memory (PM_FNAME);
       make_win32_path_UTF8_2_wchar(&pwszBuf, file);
 
-      h = p_FindFirstFileW((LPCWSTR) pwszBuf, &info_w);
+      h = p_FindFirstFileW((LPCWSTR)pwszBuf, &info_w);
       free_pool_memory(pwszBuf);
 
       pdwFileAttributes = &info_w.dwFileAttributes;
@@ -799,8 +804,9 @@ stat2(const char *file, struct stat *sb)
    rval = fstat((int)h, sb);
    CloseHandle(h);
 
-   if (attr & FILE_ATTRIBUTE_DIRECTORY) {
-      return statDir(tmpbuf, sb);
+   if (attr & FILE_ATTRIBUTE_DIRECTORY &&
+        file[1] == ':' && file[2] != 0) {
+      statDir(file, sb);
    }
 
    return rval;
@@ -815,19 +821,12 @@ stat(const char *file, struct stat *sb)
 
    memset(sb, 0, sizeof(*sb));
 
-   /* why not allow win 95 to use p_GetFileAttributesExA ? 
-    * this function allows _some_ open files to be stat'ed 
-    * if (g_platform_id == VER_PLATFORM_WIN32_WINDOWS) {
-    *    return stat2(file, sb);
-    * }
-    */
-
    if (p_GetFileAttributesExW) {
       /* dynamically allocate enough space for UCS2 filename */
       POOLMEM* pwszBuf = get_pool_memory (PM_FNAME);
       make_win32_path_UTF8_2_wchar(&pwszBuf, file);
 
-      BOOL b = p_GetFileAttributesExW((LPCWSTR) pwszBuf, GetFileExInfoStandard, &data);
+      BOOL b = p_GetFileAttributesExW((LPCWSTR)pwszBuf, GetFileExInfoStandard, &data);
       free_pool_memory(pwszBuf);
 
       if (!b) {
@@ -869,8 +868,18 @@ stat(const char *file, struct stat *sb)
    sb->st_atime = cvt_ftime_to_utime(data.ftLastAccessTime);
    sb->st_mtime = cvt_ftime_to_utime(data.ftLastWriteTime);
    sb->st_ctime = cvt_ftime_to_utime(data.ftCreationTime);
-   if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) {
-      return statDir(file, sb);
+
+   /*
+    * If we are not at the root, then to distinguish a reparse 
+    *  point from a mount point, we must call FindFirstFile() to
+    *  get the WIN32_FIND_DATA, which has the bit that indicates
+    *  that this directory is a mount point -- aren't Win32 APIs
+    *  wonderful? (sarcasm).  The code exists in the statDir
+    *  subroutine.
+    */
+   if (data.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY && 
+        file[1] == ':' && file[2] != 0) {
+      statDir(file, sb);
    }
    return 0;
 }
diff --git a/bacula/technotes-2.3 b/bacula/technotes-2.3
index 53a207495a..3d3c11b674 100644
--- a/bacula/technotes-2.3
+++ b/bacula/technotes-2.3
@@ -22,6 +22,12 @@ Add long term statistic job table
 
 
 General:
+24Mar08
+kes  Tweak Win32 mount point code.
+kes  Fix a couple of bugs in the accurate code (bad sscanf, buffer not
+     cleared giving bad file estimate).
+kes  Implement BIG_MALLOC in htable code. Runs 2.5 times faster for
+     5 Million entries.
 22Mar08
 kes  Keep Schedule run lists in order as suggested by
      Bastian Friedrich.
-- 
2.39.5