git.sur5r.net Git - bacula/bacula/blob - bacula/src/lib/htable.c

   1 /*
   2  *  Bacula hash table routines
   3  *
   4  *  htable is a hash table of items (pointers). This code is
   5  *    adapted and enhanced from code I wrote in 1982 for a
   6  *    relocatable linker.  At that time, the hash table size
   7  *    was fixed and a primary number, which essentially provides
   8  *    the randomness. In this program, the hash table can grow when
   9  *    it gets too full, so the table size here is a binary number. The
  10  *    hashing is provided using an idea from Tcl where the initial
  11  *    hash code is "randomized" using a simple calculation from
  12  *    a random number generator that multiplies by a big number
  13  *    (I multiply by a prime number, while Tcl did not)
  14  *    then shifts the result down and does the binary division
  15  *    by masking.  Increasing the size of the hash table is simple.
  16  *    Just create a new larger table, walk the old table and
  17  *    re-hash insert each entry into the new table.
  18  *
  19  *
  20  *   Kern Sibbald, July MMIII
  21  *
  22  *   Version $Id$
  23  *
  24  */
  25 /*
  26    Bacula® - The Network Backup Solution
  27
  28    Copyright (C) 2003-2006 Free Software Foundation Europe e.V.
  29
  30    The main author of Bacula is Kern Sibbald, with contributions from
  31    many others, a complete list can be found in the file AUTHORS.
  32    This program is Free Software; you can redistribute it and/or
  33    modify it under the terms of version two of the GNU General Public
  34    License as published by the Free Software Foundation and included
  35    in the file LICENSE.
  36
  37    This program is distributed in the hope that it will be useful, but
  38    WITHOUT ANY WARRANTY; without even the implied warranty of
  39    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  40    General Public License for more details.
  41
  42    You should have received a copy of the GNU General Public License
  43    along with this program; if not, write to the Free Software
  44    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  45    02110-1301, USA.
  46
  47    Bacula® is a registered trademark of John Walker.
  48    The licensor of Bacula is the Free Software Foundation Europe
  49    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  50    Switzerland, email:ftf@fsfeurope.org.
  51 */
  52
  53 #include "bacula.h"
  54
  55 #include "htable.h"
  56
  57 /* ===================================================================
  58  *    htable
  59  */
  60
  61 /*
  62  * Create hash of key, stored in hash then
  63  *  create and return the pseudo random bucket index
  64  */
  65 void htable::hash_index(char *key)
  66 {
  67    hash = 0;
  68    for (char *p=key; *p; p++) {
  69       hash += (hash << 3) + (uint32_t)*p;
  70    }
  71    /* Multiply by large prime number, take top bits, mask for remainder */
  72    index = ((hash * 1103515249) >> rshift) & mask;
  73    Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
  74 }
  75
  76 htable::htable(void *item, void *link, int tsize)
  77 {
  78    init(item, link, tsize);
  79 }
  80
  81 void htable::init(void *item, void *link, int tsize)
  82 {
  83    int pwr;
  84    tsize >>= 2;
  85    for (pwr=0; tsize; pwr++) {
  86       tsize >>= 1;
  87    }
  88    loffset = (char *)link - (char *)item;
  89    mask = ~((~0)<<pwr);               /* 3 bits => table size = 8 */
  90    rshift = 30 - pwr;                 /* start using bits 28, 29, 30 */
  91    num_items = 0;                     /* number of entries in table */
  92    buckets = 1<<pwr;                  /* hash table size -- power of two */
  93    max_items = buckets * 4;           /* allow average 4 entries per chain */
  94    table = (hlink **)malloc(buckets * sizeof(hlink *));
  95    memset(table, 0, buckets * sizeof(hlink *));
  96    walkptr = NULL;
  97    walk_index = 0;
  98 }
  99
 100 uint32_t htable::size()
 101 {
 102    return num_items;
 103 }
 104
 105 /*
 106  * Take each hash link and walk down the chain of items
 107  *  that hash there counting them (i.e. the hits),
 108  *  then report that number.
 109  *  Obiously, the more hits in a chain, the more time
 110  *  it takes to reference them. Empty chains are not so
 111  *  hot either -- as it means unused or wasted space.
 112  */
 113 #define MAX_COUNT 20
 114 void htable::stats()
 115 {
 116    int hits[MAX_COUNT];
 117    int max = 0;
 118    int i, j;
 119    hlink *p;
 120    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
 121    printf("Hits/bucket: buckets\n");
 122    for (i=0; i < MAX_COUNT; i++) {
 123       hits[i] = 0;
 124    }
 125    for (i=0; i<(int)buckets; i++) {
 126       p = table[i];
 127       j = 0;
 128       while (p) {
 129          p = (hlink *)(p->next);
 130          j++;
 131       }
 132       if (j > max) {
 133          max = j;
 134       }
 135       if (j < MAX_COUNT) {
 136          hits[j]++;
 137       }
 138    }
 139    for (i=0; i < MAX_COUNT; i++) {
 140       printf("%2d:           %d\n",i, hits[i]);
 141    }
 142    printf("max hits in a bucket = %d\n", max);
 143 }
 144
 145 void htable::grow_table()
 146 {
 147    Dmsg1(100, "Grow called old size = %d\n", buckets);
 148    /* Setup a bigger table */
 149    htable *big = (htable *)malloc(sizeof(htable));
 150    big->loffset = loffset;
 151    big->mask = mask<<1 | 1;
 152    big->rshift = rshift - 1;
 153    big->num_items = 0;
 154    big->buckets = buckets * 2;
 155    big->max_items = big->buckets * 4;
 156    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
 157    memset(big->table, 0, big->buckets * sizeof(hlink *));
 158    big->walkptr = NULL;
 159    big->walk_index = 0;
 160    /* Insert all the items in the new hash table */
 161    Dmsg1(100, "Before copy num_items=%d\n", num_items);
 162    /*
 163     * We walk through the old smaller tree getting items,
 164     * but since we are overwriting the colision links, we must
 165     * explicitly save the item->next pointer and walk each
 166     * colision chain ourselves.  We do use next() for getting
 167     * to the next bucket.
 168     */
 169    for (void *item=first(); item; ) {
 170       void *ni = ((hlink *)((char *)item+loffset))->next;  /* save link overwritten by insert */
 171       Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
 172       big->insert(((hlink *)((char *)item+loffset))->key, item);
 173       if (ni) {
 174          item = (void *)((char *)ni-loffset);
 175       } else {
 176          walkptr = NULL;
 177          item = next();
 178       }
 179    }
 180    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
 181    if (num_items != big->num_items) {
 182       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
 183    }
 184    free(table);
 185    memcpy(this, big, sizeof(htable));  /* move everything across */
 186    free(big);
 187    Dmsg0(100, "Exit grow.\n");
 188 }
 189
 190 bool htable::insert(char *key, void *item)
 191 {
 192    hlink *hp;
 193    if (lookup(key)) {
 194       return false;                   /* already exists */
 195    }
 196    ASSERT(index < buckets);
 197    Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
 198    hp = (hlink *)(((char *)item)+loffset);
 199    Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
 200       index, (unsigned)item, loffset);
 201    hp->next = table[index];
 202    hp->hash = hash;
 203    hp->key = key;
 204    table[index] = hp;
 205    Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
 206       (unsigned)hp->next, hp->hash, hp->key);
 207
 208    if (++num_items >= max_items) {
 209       Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
 210       grow_table();
 211    }
 212    Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
 213    return true;
 214 }
 215
 216 void *htable::lookup(char *key)
 217 {
 218    hash_index(key);
 219    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 220 //    Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
 221       if (hash == hp->hash && strcmp(key, hp->key) == 0) {
 222          Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
 223          return ((char *)hp)-loffset;
 224       }
 225    }
 226    return NULL;
 227 }
 228
 229 void *htable::next()
 230 {
 231    Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
 232    if (walkptr) {
 233       walkptr = (hlink *)(walkptr->next);
 234    }
 235    while (!walkptr && walk_index < buckets) {
 236       walkptr = table[walk_index++];
 237       if (walkptr) {
 238          Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
 239             (unsigned)(walkptr->next), walk_index-1);
 240       }
 241    }
 242    if (walkptr) {
 243       Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
 244          (unsigned)(((char *)walkptr)-loffset), walk_index);
 245       return ((char *)walkptr)-loffset;
 246    }
 247    Dmsg0(100, "next: return NULL\n");
 248    return NULL;
 249 }
 250
 251 void *htable::first()
 252 {
 253    Dmsg0(100, "Enter first\n");
 254    walkptr = table[0];                /* get first bucket */
 255    walk_index = 1;                    /* Point to next index */
 256    while (!walkptr && walk_index < buckets) {
 257       walkptr = table[walk_index++];  /* go to next bucket */
 258       if (walkptr) {
 259          Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
 260             (unsigned)(walkptr->next), walk_index-1);
 261       }
 262    }
 263    if (walkptr) {
 264       Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
 265       return ((char *)walkptr)-loffset;
 266    }
 267    Dmsg0(100, "Leave first walkptr=NULL\n");
 268    return NULL;
 269 }
 270
 271 /* Destroy the table and its contents */
 272 void htable::destroy()
 273 {
 274    void *ni;
 275    void *li = first();
 276
 277    while (li) {
 278       ni = next();
 279       free(li);
 280       li=ni;
 281    }
 282
 283    free(table);
 284    table = NULL;
 285    Dmsg0(100, "Done destroy.\n");
 286 }
 287
 288
 289
 290 #ifdef TEST_PROGRAM
 291
 292 struct MYJCR {
 293    char *key;
 294    hlink link;
 295 };
 296
 297 #define NITEMS 10000
 298
 299 int main()
 300 {
 301    char mkey[30];
 302    htable *jcrtbl;
 303    MYJCR *save_jcr = NULL, *item;
 304    MYJCR *jcr = NULL;
 305    int count = 0;
 306
 307    jcrtbl = (htable *)malloc(sizeof(htable));
 308    jcrtbl->init(jcr, &jcr->link, NITEMS);
 309
 310    Dmsg1(000, "Inserting %d items\n", NITEMS);
 311    for (int i=0; i<NITEMS; i++) {
 312       sprintf(mkey, "This is htable item %d", i);
 313       jcr = (MYJCR *)malloc(sizeof(MYJCR));
 314       Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
 315       jcr->key = bstrdup(mkey);
 316
 317       jcrtbl->insert(jcr->key, jcr);
 318       if (i == 10) {
 319          save_jcr = jcr;
 320       }
 321    }
 322    if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
 323       printf("Bad news: %s not found.\n", save_jcr->key);
 324    } else {
 325       printf("Item 10's key is: %s\n", item->key);
 326    }
 327
 328    jcrtbl->stats();
 329    printf("Walk the hash table:\n");
 330    foreach_htable (jcr, jcrtbl) {
 331 //    printf("htable item = %s\n", jcr->key);
 332       free(jcr->key);
 333       count++;
 334    }
 335    printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
 336    printf("Calling destroy\n");
 337    jcrtbl->destroy();
 338
 339    free(jcrtbl);
 340    printf("Freed jcrtbl\n");
 341
 342    sm_dump(false);
 343
 344 }
 345 #endif