ebl use tokyocabinet by default instead of htable

[bacula/bacula] / bacula / src / lib / htable.c
diff --git a/bacula/src/lib/htable.c b/bacula/src/lib/htable.c

index 88fd2343edc6752ece3251559352e87a717e4b49..25cafb9f547eef75722ec3aec6c3d5908b5e5055 100644 (file)
--- a/bacula/src/lib/htable.c
+++ b/bacula/src/lib/htable.c
@@ -1,31 +1,7 @@
-/*
- *  Bacula hash table routines
- *
- *  htable is a hash table of items (pointers). This code is
- *    adapted and enhanced from code I wrote in 1982 for a
- *    relocatable linker.  At that time, the hash table size
- *    was fixed and a primary number, which essentially provides
- *    the randomness. In this program, the hash table can grow when
- *    it gets too full, so the table size here is a binary number. The
- *    hashing is provided using an idea from Tcl where the initial
- *    hash code is "randomized" using a simple calculation from
- *    a random number generator that multiplies by a big number
- *    (I multiply by a prime number, while Tcl did not)
- *    then shifts the result down and does the binary division
- *    by masking.  Increasing the size of the hash table is simple.
- *    Just create a new larger table, walk the old table and
- *    re-hash insert each entry into the new table.
- *
- *
- *   Kern Sibbald, July MMIII
- *
- *   Version $Id$
- *
- */
  /*
     Bacula® - The Network Backup Solution
  
-   Copyright (C) 2003-2006 Free Software Foundation Europe e.V.
+   Copyright (C) 2003-2008 Free Software Foundation Europe e.V.
  
     The main author of Bacula is Kern Sibbald, with contributions from
     many others, a complete list can be found in the file AUTHORS.
@@ -49,6 +25,30 @@
     (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
     Switzerland, email:ftf@fsfeurope.org.
  */
+/*
+ *  Bacula hash table routines
+ *
+ *  htable is a hash table of items (pointers). This code is
+ *    adapted and enhanced from code I wrote in 1982 for a
+ *    relocatable linker.  At that time, the hash table size
+ *    was fixed and a primary number, which essentially provides
+ *    the randomness. In this program, the hash table can grow when
+ *    it gets too full, so the table size here is a binary number. The
+ *    hashing is provided using an idea from Tcl where the initial
+ *    hash code is "randomized" using a simple calculation from
+ *    a random number generator that multiplies by a big number
+ *    (I multiply by a prime number, while Tcl did not)
+ *    then shifts the result down and does the binary division
+ *    by masking.  Increasing the size of the hash table is simple.
+ *    Just create a new larger table, walk the old table and
+ *    re-hash insert each entry into the new table.
+ *
+ *
+ *   Kern Sibbald, July MMIII
+ *
+ *   Version $Id$
+ *
+ */
  
  #include "bacula.h"
  
@@ -58,6 +58,67 @@
   *    htable
   */
  
+#ifdef BIG_MALLOC
+/*
+ * This subroutine gets a big buffer.
+ */
+void htable::malloc_buf(int size)
+{
+   struct h_mem *hmem;
+
+   hmem = (struct h_mem *)malloc(size);
+   total_size += size;
+   blocks++;
+   hmem->next = this->mem;
+   this->mem = hmem;
+   hmem->mem = mem->first;
+   hmem->rem = (char *)hmem + size - hmem->mem;
+   Dmsg2(200, "malloc buf size=%d rem=%d\n", size, hmem->rem);
+}
+
+/* This routine frees the whole tree */
+void htable::hash_free()
+{
+   struct h_mem *hmem, *rel;
+
+   for (hmem=mem; hmem; ) {
+      rel = hmem;
+      hmem = hmem->next;
+      free(rel);
+   }
+}
+
+#endif
+
+char *htable::hash_malloc(int size)
+{
+#ifdef BIG_MALLOC
+   char *buf;
+   int asize = BALIGN(size);
+
+   if (mem->rem < asize) {
+      uint32_t mb_size;
+      if (total_size >= 1000000) {
+         mb_size = 1000000;
+      } else {
+         mb_size = 100000;
+      }
+      malloc_buf(mb_size);
+   }
+   mem->rem -= asize;
+   buf = mem->mem;
+   mem->mem += asize;
+   return buf;
+#else 
+   total_size += size;
+   blocks++;
+   return (char *)malloc(size);
+#endif
+}
+
+
+ 
+
  /*
   * Create hash of key, stored in hash then
   *  create and return the pseudo random bucket index
@@ -66,13 +127,17 @@ void htable::hash_index(char *key)
  {
     hash = 0;
     for (char *p=key; *p; p++) {
-      hash += (hash << 3) + (uint32_t)*p;
+//    hash += (hash << 3) + (uint32_t)*p;
+      hash +=  ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
     }
     /* Multiply by large prime number, take top bits, mask for remainder */
     index = ((hash * 1103515249) >> rshift) & mask;
     Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
  }
  
+/*
+ * tsize is the estimated number of entries in the hash table
+ */
  htable::htable(void *item, void *link, int tsize)
  {
     init(item, link, tsize);
@@ -81,6 +146,10 @@ htable::htable(void *item, void *link, int tsize)
  void htable::init(void *item, void *link, int tsize)
  {
     int pwr;
+
+   if (tsize < 31) {
+      tsize = 31;
+   }
     tsize >>= 2;
     for (pwr=0; tsize; pwr++) {
        tsize >>= 1;
@@ -95,6 +164,12 @@ void htable::init(void *item, void *link, int tsize)
     memset(table, 0, buckets * sizeof(hlink *));
     walkptr = NULL;
     walk_index = 0;
+   total_size = 0;
+   blocks = 0;
+#ifdef BIG_MALLOC
+   mem = NULL;
+   malloc_buf(1000000);   /* ***FIXME*** need variable or some estimate */
+#endif
  }
  
  uint32_t htable::size()
@@ -106,7 +181,7 @@ uint32_t htable::size()
   * Take each hash link and walk down the chain of items
   *  that hash there counting them (i.e. the hits), 
   *  then report that number.
- *  Obiously, the more hits in a chain, the more time
+ * Obiously, the more hits in a chain, the more time
   *  it takes to reference them. Empty chains are not so
   *  hot either -- as it means unused or wasted space.
   */
@@ -139,7 +214,12 @@ void htable::stats()
     for (i=0; i < MAX_COUNT; i++) {
        printf("%2d:           %d\n",i, hits[i]);
     }
+   printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
     printf("max hits in a bucket = %d\n", max);
+#ifdef BIG_MALLOC
+   printf("total bytes malloced = %d\n", total_size);
+   printf("total blocks malloced = %d\n", blocks);
+#endif
  }
  
  void htable::grow_table()
@@ -194,16 +274,16 @@ bool htable::insert(char *key, void *item)
        return false;                   /* already exists */
     }
     ASSERT(index < buckets);
-   Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
+   Dmsg2(100, "Insert: hash=%p index=%d\n", hash, index);
     hp = (hlink *)(((char *)item)+loffset);
-   Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
-      index, (unsigned)item, loffset);
+   Dmsg4(100, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
+      index, item, loffset);
     hp->next = table[index];
     hp->hash = hash;
     hp->key = key;
     table[index] = hp;
-   Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
-      (unsigned)hp->next, hp->hash, hp->key);
+   Dmsg3(100, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
+      hp->next, hp->hash, hp->key);
  
     if (++num_items >= max_items) {
        Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
@@ -217,9 +297,9 @@ void *htable::lookup(char *key)
  {
     hash_index(key);
     for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
-//    Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
+//    Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
        if (hash == hp->hash && strcmp(key, hp->key) == 0) {
-         Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
+         Dmsg1(100, "lookup return %p\n", ((char *)hp)-loffset);
           return ((char *)hp)-loffset;
        }
     }
@@ -228,20 +308,20 @@ void *htable::lookup(char *key)
  
  void *htable::next()
  {
-   Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
+   Dmsg1(100, "Enter next: walkptr=%p\n", walkptr);
     if (walkptr) {
        walkptr = (hlink *)(walkptr->next);
     }
     while (!walkptr && walk_index < buckets) {
        walkptr = table[walk_index++];
        if (walkptr) {
-         Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
-            (unsigned)(walkptr->next), walk_index-1);
+         Dmsg3(100, "new walkptr=%p next=%p inx=%d\n", walkptr,
+            walkptr->next, walk_index-1);
        }
     }
     if (walkptr) {
-      Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
-         (unsigned)(((char *)walkptr)-loffset), walk_index);
+      Dmsg2(100, "next: rtn %p walk_index=%d\n",
+         ((char *)walkptr)-loffset, walk_index);
        return ((char *)walkptr)-loffset;
     }
     Dmsg0(100, "next: return NULL\n");
@@ -256,12 +336,12 @@ void *htable::first()
     while (!walkptr && walk_index < buckets) {
        walkptr = table[walk_index++];  /* go to next bucket */
        if (walkptr) {
-         Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
-            (unsigned)(walkptr->next), walk_index-1);
+         Dmsg3(100, "first new walkptr=%p next=%p inx=%d\n", walkptr,
+            walkptr->next, walk_index-1);
        }
     }
     if (walkptr) {
-      Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
+      Dmsg1(100, "Leave first walkptr=%p\n", walkptr);
        return ((char *)walkptr)-loffset;
     }
     Dmsg0(100, "Leave first walkptr=NULL\n");
@@ -271,6 +351,9 @@ void *htable::first()
  /* Destroy the table and its contents */
  void htable::destroy()
  {
+#ifdef BIG_MALLOC
+   hash_free();
+#else
     void *ni;
     void *li = first();
  
@@ -279,6 +362,7 @@ void htable::destroy()
        free(li);
        li=ni;
     }
+#endif
  
     free(table);
     table = NULL;
@@ -294,7 +378,7 @@ struct MYJCR {
     hlink link;
  };
  
-#define NITEMS 10000
+#define NITEMS 5000000
  
  int main()
  {
@@ -309,10 +393,13 @@ int main()
  
     Dmsg1(000, "Inserting %d items\n", NITEMS);
     for (int i=0; i<NITEMS; i++) {
-      sprintf(mkey, "This is htable item %d", i);
-      jcr = (MYJCR *)malloc(sizeof(MYJCR));
-      Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
-      jcr->key = bstrdup(mkey);
+      int len;
+      len = sprintf(mkey, "This is htable item %d", i) + 1;
+
+      jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
+      jcr->key = (char *)jcrtbl->hash_malloc(len);
+      memcpy(jcr->key, mkey, len);
+      Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
  
        jcrtbl->insert(jcr->key, jcr);
        if (i == 10) {
@@ -329,7 +416,9 @@ int main()
     printf("Walk the hash table:\n");
     foreach_htable (jcr, jcrtbl) {
  //    printf("htable item = %s\n", jcr->key);
+#ifndef BIG_MALLOC
        free(jcr->key);
+#endif
        count++;
     }
     printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");