git.sur5r.net Git - bacula/bacula/blob - bacula/src/lib/htable.c

   1 /*
   2    Bacula® - The Network Backup Solution
   3
   4    Copyright (C) 2003-2008 Free Software Foundation Europe e.V.
   5
   6    The main author of Bacula is Kern Sibbald, with contributions from
   7    many others, a complete list can be found in the file AUTHORS.
   8    This program is Free Software; you can redistribute it and/or
   9    modify it under the terms of version two of the GNU General Public
  10    License as published by the Free Software Foundation and included
  11    in the file LICENSE.
  12
  13    This program is distributed in the hope that it will be useful, but
  14    WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16    General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  21    02110-1301, USA.
  22
  23    Bacula® is a registered trademark of Kern Sibbald.
  24    The licensor of Bacula is the Free Software Foundation Europe
  25    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  26    Switzerland, email:ftf@fsfeurope.org.
  27 */
  28 /*
  29  *  Bacula hash table routines
  30  *
  31  *  htable is a hash table of items (pointers). This code is
  32  *    adapted and enhanced from code I wrote in 1982 for a
  33  *    relocatable linker.  At that time, the hash table size
  34  *    was fixed and a primary number, which essentially provides
  35  *    the randomness. In this program, the hash table can grow when
  36  *    it gets too full, so the table size here is a binary number. The
  37  *    hashing is provided using an idea from Tcl where the initial
  38  *    hash code is "randomized" using a simple calculation from
  39  *    a random number generator that multiplies by a big number
  40  *    (I multiply by a prime number, while Tcl did not)
  41  *    then shifts the result down and does the binary division
  42  *    by masking.  Increasing the size of the hash table is simple.
  43  *    Just create a new larger table, walk the old table and
  44  *    re-hash insert each entry into the new table.
  45  *
  46  *
  47  *   Kern Sibbald, July MMIII
  48  *
  49  *   Version $Id$
  50  *
  51  */
  52
  53 #include "bacula.h"
  54
  55 #include "htable.h"
  56
  57 static const int dbglvl = 500;
  58
  59 /* ===================================================================
  60  *    htable
  61  */
  62
  63 #ifdef BIG_MALLOC
  64 /*
  65  * This subroutine gets a big buffer.
  66  */
  67 void htable::malloc_big_buf(int size)
  68 {
  69    struct h_mem *hmem;
  70
  71    hmem = (struct h_mem *)malloc(size);
  72    total_size += size;
  73    blocks++;
  74    hmem->next = mem_block;
  75    mem_block = hmem;
  76    hmem->mem = mem_block->first;
  77    hmem->rem = (char *)hmem + size - hmem->mem;
  78    Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
  79 }
  80
  81 /* This routine frees the whole tree */
  82 void htable::hash_big_free()
  83 {
  84    struct h_mem *hmem, *rel;
  85
  86    for (hmem=mem_block; hmem; ) {
  87       rel = hmem;
  88       hmem = hmem->next;
  89       Dmsg1(100, "free malloc buf=%p\n", rel);
  90       free(rel);
  91    }
  92 }
  93
  94 #endif
  95
  96 /*
  97  * Normal hash malloc routine that gets a
  98  *  "small" buffer from the big buffer
  99  */
 100 char *htable::hash_malloc(int size)
 101 {
 102 #ifdef BIG_MALLOC
 103    char *buf;
 104    int asize = BALIGN(size);
 105
 106    if (mem_block->rem < asize) {
 107       uint32_t mb_size;
 108       if (total_size >= 1000000) {
 109          mb_size = 1000000;
 110       } else {
 111          mb_size = 100000;
 112       }
 113       malloc_big_buf(mb_size);
 114    }
 115    mem_block->rem -= asize;
 116    buf = mem_block->mem;
 117    mem_block->mem += asize;
 118    return buf;
 119 #else
 120    total_size += size;
 121    blocks++;
 122    return (char *)malloc(size);
 123 #endif
 124 }
 125
 126
 127
 128
 129 /*
 130  * Create hash of key, stored in hash then
 131  *  create and return the pseudo random bucket index
 132  */
 133 void htable::hash_index(char *key)
 134 {
 135    hash = 0;
 136    for (char *p=key; *p; p++) {
 137 //    hash += (hash << 3) + (uint32_t)*p;
 138       hash +=  ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
 139    }
 140    /* Multiply by large prime number, take top bits, mask for remainder */
 141    index = ((hash * 1103515249) >> rshift) & mask;
 142    Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
 143 }
 144
 145 /*
 146  * tsize is the estimated number of entries in the hash table
 147  */
 148 htable::htable(void *item, void *link, int tsize)
 149 {
 150    init(item, link, tsize);
 151 }
 152
 153 void htable::init(void *item, void *link, int tsize)
 154 {
 155    int pwr;
 156
 157    memset(this, 0, sizeof(htable));
 158    if (tsize < 31) {
 159       tsize = 31;
 160    }
 161    tsize >>= 2;
 162    for (pwr=0; tsize; pwr++) {
 163       tsize >>= 1;
 164    }
 165    loffset = (char *)link - (char *)item;
 166    mask = ~((~0)<<pwr);               /* 3 bits => table size = 8 */
 167    rshift = 30 - pwr;                 /* start using bits 28, 29, 30 */
 168    buckets = 1<<pwr;                  /* hash table size -- power of two */
 169    max_items = buckets * 4;           /* allow average 4 entries per chain */
 170    table = (hlink **)malloc(buckets * sizeof(hlink *));
 171    memset(table, 0, buckets * sizeof(hlink *));
 172 #ifdef BIG_MALLOC
 173    malloc_big_buf(1000000);   /* ***FIXME*** need variable or some estimate */
 174 #endif
 175 }
 176
 177 uint32_t htable::size()
 178 {
 179    return num_items;
 180 }
 181
 182 /*
 183  * Take each hash link and walk down the chain of items
 184  *  that hash there counting them (i.e. the hits),
 185  *  then report that number.
 186  * Obiously, the more hits in a chain, the more time
 187  *  it takes to reference them. Empty chains are not so
 188  *  hot either -- as it means unused or wasted space.
 189  */
 190 #define MAX_COUNT 20
 191 void htable::stats()
 192 {
 193    int hits[MAX_COUNT];
 194    int max = 0;
 195    int i, j;
 196    hlink *p;
 197    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
 198    printf("Hits/bucket: buckets\n");
 199    for (i=0; i < MAX_COUNT; i++) {
 200       hits[i] = 0;
 201    }
 202    for (i=0; i<(int)buckets; i++) {
 203       p = table[i];
 204       j = 0;
 205       while (p) {
 206          p = (hlink *)(p->next);
 207          j++;
 208       }
 209       if (j > max) {
 210          max = j;
 211       }
 212       if (j < MAX_COUNT) {
 213          hits[j]++;
 214       }
 215    }
 216    for (i=0; i < MAX_COUNT; i++) {
 217       printf("%2d:           %d\n",i, hits[i]);
 218    }
 219    printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
 220    printf("max hits in a bucket = %d\n", max);
 221 #ifdef BIG_MALLOC
 222    printf("total bytes malloced = %d\n", total_size);
 223    printf("total blocks malloced = %d\n", blocks);
 224 #endif
 225 }
 226
 227 void htable::grow_table()
 228 {
 229    Dmsg1(100, "Grow called old size = %d\n", buckets);
 230    /* Setup a bigger table */
 231    htable *big = (htable *)malloc(sizeof(htable));
 232    memcpy(big, this, sizeof(htable));  /* start with original class data */
 233    big->loffset = loffset;
 234    big->mask = mask<<1 | 1;
 235    big->rshift = rshift - 1;
 236    big->num_items = 0;
 237    big->buckets = buckets * 2;
 238    big->max_items = big->buckets * 4;
 239    /* Create a bigger hash table */
 240    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
 241    memset(big->table, 0, big->buckets * sizeof(hlink *));
 242    big->walkptr = NULL;
 243    big->walk_index = 0;
 244    /* Insert all the items in the new hash table */
 245    Dmsg1(100, "Before copy num_items=%d\n", num_items);
 246    /*
 247     * We walk through the old smaller tree getting items,
 248     * but since we are overwriting the colision links, we must
 249     * explicitly save the item->next pointer and walk each
 250     * colision chain ourselves.  We do use next() for getting
 251     * to the next bucket.
 252     */
 253    for (void *item=first(); item; ) {
 254       void *ni = ((hlink *)((char *)item+loffset))->next;  /* save link overwritten by insert */
 255       Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
 256       big->insert(((hlink *)((char *)item+loffset))->key, item);
 257       if (ni) {
 258          item = (void *)((char *)ni-loffset);
 259       } else {
 260          walkptr = NULL;
 261          item = next();
 262       }
 263    }
 264    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
 265    if (num_items != big->num_items) {
 266       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
 267    }
 268    free(table);
 269    memcpy(this, big, sizeof(htable));  /* move everything across */
 270    free(big);
 271    Dmsg0(100, "Exit grow.\n");
 272 }
 273
 274 bool htable::insert(char *key, void *item)
 275 {
 276    hlink *hp;
 277    if (lookup(key)) {
 278       return false;                   /* already exists */
 279    }
 280    ASSERT(index < buckets);
 281    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 282    hp = (hlink *)(((char *)item)+loffset);
 283    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 284       index, item, loffset);
 285    hp->next = table[index];
 286    hp->hash = hash;
 287    hp->key = key;
 288    table[index] = hp;
 289    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
 290       hp->next, hp->hash, hp->key);
 291
 292    if (++num_items >= max_items) {
 293       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 294       grow_table();
 295    }
 296    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
 297    return true;
 298 }
 299
 300 void *htable::lookup(char *key)
 301 {
 302    hash_index(key);
 303    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 304 //    Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
 305       if (hash == hp->hash && strcmp(key, hp->key) == 0) {
 306          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 307          return ((char *)hp)-loffset;
 308       }
 309    }
 310    return NULL;
 311 }
 312
 313 void *htable::next()
 314 {
 315    Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
 316    if (walkptr) {
 317       walkptr = (hlink *)(walkptr->next);
 318    }
 319    while (!walkptr && walk_index < buckets) {
 320       walkptr = table[walk_index++];
 321       if (walkptr) {
 322          Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
 323             walkptr->next, walk_index-1);
 324       }
 325    }
 326    if (walkptr) {
 327       Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
 328          ((char *)walkptr)-loffset, walk_index);
 329       return ((char *)walkptr)-loffset;
 330    }
 331    Dmsg0(dbglvl, "next: return NULL\n");
 332    return NULL;
 333 }
 334
 335 void *htable::first()
 336 {
 337    Dmsg0(dbglvl, "Enter first\n");
 338    walkptr = table[0];                /* get first bucket */
 339    walk_index = 1;                    /* Point to next index */
 340    while (!walkptr && walk_index < buckets) {
 341       walkptr = table[walk_index++];  /* go to next bucket */
 342       if (walkptr) {
 343          Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
 344             walkptr->next, walk_index-1);
 345       }
 346    }
 347    if (walkptr) {
 348       Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
 349       return ((char *)walkptr)-loffset;
 350    }
 351    Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
 352    return NULL;
 353 }
 354
 355 /* Destroy the table and its contents */
 356 void htable::destroy()
 357 {
 358 #ifdef BIG_MALLOC
 359    hash_big_free();
 360 #else
 361    void *ni;
 362    void *li = first();
 363
 364    while (li) {
 365       ni = next();
 366       free(li);
 367       li=ni;
 368    }
 369 #endif
 370
 371    free(table);
 372    table = NULL;
 373    Dmsg0(100, "Done destroy.\n");
 374 }
 375
 376
 377
 378 #ifdef TEST_PROGRAM
 379
 380 struct MYJCR {
 381    char *key;
 382    hlink link;
 383 };
 384
 385 #define NITEMS 5000000
 386
 387 int main()
 388 {
 389    char mkey[30];
 390    htable *jcrtbl;
 391    MYJCR *save_jcr = NULL, *item;
 392    MYJCR *jcr = NULL;
 393    int count = 0;
 394
 395    jcrtbl = (htable *)malloc(sizeof(htable));
 396    jcrtbl->init(jcr, &jcr->link, NITEMS);
 397
 398    Dmsg1(000, "Inserting %d items\n", NITEMS);
 399    for (int i=0; i<NITEMS; i++) {
 400       int len;
 401       len = sprintf(mkey, "This is htable item %d", i) + 1;
 402
 403       jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
 404       jcr->key = (char *)jcrtbl->hash_malloc(len);
 405       memcpy(jcr->key, mkey, len);
 406       Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
 407
 408       jcrtbl->insert(jcr->key, jcr);
 409       if (i == 10) {
 410          save_jcr = jcr;
 411       }
 412    }
 413    if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
 414       printf("Bad news: %s not found.\n", save_jcr->key);
 415    } else {
 416       printf("Item 10's key is: %s\n", item->key);
 417    }
 418
 419    jcrtbl->stats();
 420    printf("Walk the hash table:\n");
 421    foreach_htable (jcr, jcrtbl) {
 422 //    printf("htable item = %s\n", jcr->key);
 423 #ifndef BIG_MALLOC
 424       free(jcr->key);
 425 #endif
 426       count++;
 427    }
 428    printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
 429    printf("Calling destroy\n");
 430    jcrtbl->destroy();
 431
 432    free(jcrtbl);
 433    printf("Freed jcrtbl\n");
 434
 435    sm_dump(false);
 436
 437 }
 438 #endif