git.sur5r.net Git - bacula/bacula/blob - bacula/src/lib/htable.c

   1 /*
   2    Bacula® - The Network Backup Solution
   3
   4    Copyright (C) 2003-2010 Free Software Foundation Europe e.V.
   5
   6    The main author of Bacula is Kern Sibbald, with contributions from
   7    many others, a complete list can be found in the file AUTHORS.
   8    This program is Free Software; you can redistribute it and/or
   9    modify it under the terms of version two of the GNU General Public
  10    License as published by the Free Software Foundation and included
  11    in the file LICENSE.
  12
  13    This program is distributed in the hope that it will be useful, but
  14    WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16    General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  21    02110-1301, USA.
  22
  23    Bacula® is a registered trademark of Kern Sibbald.
  24    The licensor of Bacula is the Free Software Foundation Europe
  25    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  26    Switzerland, email:ftf@fsfeurope.org.
  27 */
  28 /*
  29  *  Bacula hash table routines
  30  *
  31  *  htable is a hash table of items (pointers). This code is
  32  *    adapted and enhanced from code I wrote in 1982 for a
  33  *    relocatable linker.  At that time, the hash table size
  34  *    was fixed and a primary number, which essentially provides
  35  *    the randomness. In this program, the hash table can grow when
  36  *    it gets too full, so the table size here is a binary number. The
  37  *    hashing is provided using an idea from Tcl where the initial
  38  *    hash code is "randomized" using a simple calculation from
  39  *    a random number generator that multiplies by a big number
  40  *    (I multiply by a prime number, while Tcl did not)
  41  *    then shifts the result down and does the binary division
  42  *    by masking.  Increasing the size of the hash table is simple.
  43  *    Just create a new larger table, walk the old table and
  44  *    re-hash insert each entry into the new table.
  45  *
  46  *
  47  *   Kern Sibbald, July MMIII
  48  *
  49  */
  50
  51 #include "bacula.h"
  52
  53 #include "htable.h"
  54
  55 static const int dbglvl = 500;
  56
  57 /* ===================================================================
  58  *    htable
  59  */
  60
  61 #ifdef BIG_MALLOC
  62 /*
  63  * This subroutine gets a big buffer.
  64  */
  65 void htable::malloc_big_buf(int size)
  66 {
  67    struct h_mem *hmem;
  68
  69    hmem = (struct h_mem *)malloc(size);
  70    total_size += size;
  71    blocks++;
  72    hmem->next = mem_block;
  73    mem_block = hmem;
  74    hmem->mem = mem_block->first;
  75    hmem->rem = (char *)hmem + size - hmem->mem;
  76    Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
  77 }
  78
  79 /* This routine frees the whole tree */
  80 void htable::hash_big_free()
  81 {
  82    struct h_mem *hmem, *rel;
  83
  84    for (hmem=mem_block; hmem; ) {
  85       rel = hmem;
  86       hmem = hmem->next;
  87       Dmsg1(100, "free malloc buf=%p\n", rel);
  88       free(rel);
  89    }
  90 }
  91
  92 #endif
  93
  94 /*
  95  * Normal hash malloc routine that gets a
  96  *  "small" buffer from the big buffer
  97  */
  98 char *htable::hash_malloc(int size)
  99 {
 100 #ifdef BIG_MALLOC
 101    char *buf;
 102    int asize = BALIGN(size);
 103
 104    if (mem_block->rem < asize) {
 105       uint32_t mb_size;
 106       if (total_size >= 1000000) {
 107          mb_size = 1000000;
 108       } else {
 109          mb_size = 100000;
 110       }
 111       malloc_big_buf(mb_size);
 112    }
 113    mem_block->rem -= asize;
 114    buf = mem_block->mem;
 115    mem_block->mem += asize;
 116    return buf;
 117 #else
 118    total_size += size;
 119    blocks++;
 120    return (char *)malloc(size);
 121 #endif
 122 }
 123
 124
 125
 126
 127 /*
 128  * Create hash of key, stored in hash then
 129  *  create and return the pseudo random bucket index
 130  */
 131 void htable::hash_index(char *key)
 132 {
 133    hash = 0;
 134    for (char *p=key; *p; p++) {
 135 //    hash += (hash << 3) + (uint32_t)*p;
 136       hash +=  ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
 137    }
 138    /* Multiply by large prime number, take top bits, mask for remainder */
 139    index = ((hash * 1103515249) >> rshift) & mask;
 140    Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
 141 }
 142
 143 /*
 144  * tsize is the estimated number of entries in the hash table
 145  */
 146 htable::htable(void *item, void *link, int tsize)
 147 {
 148    init(item, link, tsize);
 149 }
 150
 151 void htable::init(void *item, void *link, int tsize)
 152 {
 153    int pwr;
 154
 155    memset(this, 0, sizeof(htable));
 156    if (tsize < 31) {
 157       tsize = 31;
 158    }
 159    tsize >>= 2;
 160    for (pwr=0; tsize; pwr++) {
 161       tsize >>= 1;
 162    }
 163    loffset = (char *)link - (char *)item;
 164    mask = ~((~0)<<pwr);               /* 3 bits => table size = 8 */
 165    rshift = 30 - pwr;                 /* start using bits 28, 29, 30 */
 166    buckets = 1<<pwr;                  /* hash table size -- power of two */
 167    max_items = buckets * 4;           /* allow average 4 entries per chain */
 168    table = (hlink **)malloc(buckets * sizeof(hlink *));
 169    memset(table, 0, buckets * sizeof(hlink *));
 170 #ifdef BIG_MALLOC
 171    malloc_big_buf(1000000);   /* ***FIXME*** need variable or some estimate */
 172 #endif
 173 }
 174
 175 uint32_t htable::size()
 176 {
 177    return num_items;
 178 }
 179
 180 /*
 181  * Take each hash link and walk down the chain of items
 182  *  that hash there counting them (i.e. the hits),
 183  *  then report that number.
 184  * Obiously, the more hits in a chain, the more time
 185  *  it takes to reference them. Empty chains are not so
 186  *  hot either -- as it means unused or wasted space.
 187  */
 188 #define MAX_COUNT 20
 189 void htable::stats()
 190 {
 191    int hits[MAX_COUNT];
 192    int max = 0;
 193    int i, j;
 194    hlink *p;
 195    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
 196    printf("Hits/bucket: buckets\n");
 197    for (i=0; i < MAX_COUNT; i++) {
 198       hits[i] = 0;
 199    }
 200    for (i=0; i<(int)buckets; i++) {
 201       p = table[i];
 202       j = 0;
 203       while (p) {
 204          p = (hlink *)(p->next);
 205          j++;
 206       }
 207       if (j > max) {
 208          max = j;
 209       }
 210       if (j < MAX_COUNT) {
 211          hits[j]++;
 212       }
 213    }
 214    for (i=0; i < MAX_COUNT; i++) {
 215       printf("%2d:           %d\n",i, hits[i]);
 216    }
 217    printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
 218    printf("max hits in a bucket = %d\n", max);
 219 #ifdef BIG_MALLOC
 220    printf("total bytes malloced = %d\n", total_size);
 221    printf("total blocks malloced = %d\n", blocks);
 222 #endif
 223 }
 224
 225 void htable::grow_table()
 226 {
 227    Dmsg1(100, "Grow called old size = %d\n", buckets);
 228    /* Setup a bigger table */
 229    htable *big = (htable *)malloc(sizeof(htable));
 230    memcpy(big, this, sizeof(htable));  /* start with original class data */
 231    big->loffset = loffset;
 232    big->mask = mask<<1 | 1;
 233    big->rshift = rshift - 1;
 234    big->num_items = 0;
 235    big->buckets = buckets * 2;
 236    big->max_items = big->buckets * 4;
 237    /* Create a bigger hash table */
 238    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
 239    memset(big->table, 0, big->buckets * sizeof(hlink *));
 240    big->walkptr = NULL;
 241    big->walk_index = 0;
 242    /* Insert all the items in the new hash table */
 243    Dmsg1(100, "Before copy num_items=%d\n", num_items);
 244    /*
 245     * We walk through the old smaller tree getting items,
 246     * but since we are overwriting the colision links, we must
 247     * explicitly save the item->next pointer and walk each
 248     * colision chain ourselves.  We do use next() for getting
 249     * to the next bucket.
 250     */
 251    for (void *item=first(); item; ) {
 252       void *ni = ((hlink *)((char *)item+loffset))->next;  /* save link overwritten by insert */
 253       Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
 254       big->insert(((hlink *)((char *)item+loffset))->key, item);
 255       if (ni) {
 256          item = (void *)((char *)ni-loffset);
 257       } else {
 258          walkptr = NULL;
 259          item = next();
 260       }
 261    }
 262    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
 263    if (num_items != big->num_items) {
 264       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
 265    }
 266    free(table);
 267    memcpy(this, big, sizeof(htable));  /* move everything across */
 268    free(big);
 269    Dmsg0(100, "Exit grow.\n");
 270 }
 271
 272 bool htable::insert(char *key, void *item)
 273 {
 274    hlink *hp;
 275    if (lookup(key)) {
 276       return false;                   /* already exists */
 277    }
 278    ASSERT(index < buckets);
 279    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 280    hp = (hlink *)(((char *)item)+loffset);
 281    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 282       index, item, loffset);
 283    hp->next = table[index];
 284    hp->hash = hash;
 285    hp->key = key;
 286    table[index] = hp;
 287    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
 288       hp->next, hp->hash, hp->key);
 289
 290    if (++num_items >= max_items) {
 291       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 292       grow_table();
 293    }
 294    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
 295    return true;
 296 }
 297
 298 void *htable::lookup(char *key)
 299 {
 300    hash_index(key);
 301    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 302 //    Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
 303       if (hash == hp->hash && strcmp(key, hp->key) == 0) {
 304          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 305          return ((char *)hp)-loffset;
 306       }
 307    }
 308    return NULL;
 309 }
 310
 311 void *htable::next()
 312 {
 313    Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
 314    if (walkptr) {
 315       walkptr = (hlink *)(walkptr->next);
 316    }
 317    while (!walkptr && walk_index < buckets) {
 318       walkptr = table[walk_index++];
 319       if (walkptr) {
 320          Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
 321             walkptr->next, walk_index-1);
 322       }
 323    }
 324    if (walkptr) {
 325       Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
 326          ((char *)walkptr)-loffset, walk_index);
 327       return ((char *)walkptr)-loffset;
 328    }
 329    Dmsg0(dbglvl, "next: return NULL\n");
 330    return NULL;
 331 }
 332
 333 void *htable::first()
 334 {
 335    Dmsg0(dbglvl, "Enter first\n");
 336    walkptr = table[0];                /* get first bucket */
 337    walk_index = 1;                    /* Point to next index */
 338    while (!walkptr && walk_index < buckets) {
 339       walkptr = table[walk_index++];  /* go to next bucket */
 340       if (walkptr) {
 341          Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
 342             walkptr->next, walk_index-1);
 343       }
 344    }
 345    if (walkptr) {
 346       Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
 347       return ((char *)walkptr)-loffset;
 348    }
 349    Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
 350    return NULL;
 351 }
 352
 353 /* Destroy the table and its contents */
 354 void htable::destroy()
 355 {
 356 #ifdef BIG_MALLOC
 357    hash_big_free();
 358 #else
 359    void *ni;
 360    void *li = first();
 361
 362    while (li) {
 363       ni = next();
 364       free(li);
 365       li=ni;
 366    }
 367 #endif
 368
 369    free(table);
 370    table = NULL;
 371    Dmsg0(100, "Done destroy.\n");
 372 }
 373
 374
 375
 376 #ifdef TEST_PROGRAM
 377
 378 struct MYJCR {
 379    char *key;
 380    hlink link;
 381 };
 382
 383 #define NITEMS 5000000
 384
 385 int main()
 386 {
 387    char mkey[30];
 388    htable *jcrtbl;
 389    MYJCR *save_jcr = NULL, *item;
 390    MYJCR *jcr = NULL;
 391    int count = 0;
 392
 393    jcrtbl = (htable *)malloc(sizeof(htable));
 394    jcrtbl->init(jcr, &jcr->link, NITEMS);
 395
 396    Dmsg1(000, "Inserting %d items\n", NITEMS);
 397    for (int i=0; i<NITEMS; i++) {
 398       int len;
 399       len = sprintf(mkey, "This is htable item %d", i) + 1;
 400
 401       jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
 402       jcr->key = (char *)jcrtbl->hash_malloc(len);
 403       memcpy(jcr->key, mkey, len);
 404       Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
 405
 406       jcrtbl->insert(jcr->key, jcr);
 407       if (i == 10) {
 408          save_jcr = jcr;
 409       }
 410    }
 411    if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
 412       printf("Bad news: %s not found.\n", save_jcr->key);
 413    } else {
 414       printf("Item 10's key is: %s\n", item->key);
 415    }
 416
 417    jcrtbl->stats();
 418    printf("Walk the hash table:\n");
 419    foreach_htable (jcr, jcrtbl) {
 420 //    printf("htable item = %s\n", jcr->key);
 421 #ifndef BIG_MALLOC
 422       free(jcr->key);
 423 #endif
 424       count++;
 425    }
 426    printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
 427    printf("Calling destroy\n");
 428    jcrtbl->destroy();
 429
 430    free(jcrtbl);
 431    printf("Freed jcrtbl\n");
 432
 433    sm_dump(false);   /* unit test */
 434
 435 }
 436 #endif