git.sur5r.net Git - bacula/bacula/blob - bacula/src/lib/htable.c

   1 /*
   2    Bacula® - The Network Backup Solution
   3
   4    Copyright (C) 2003-2010 Free Software Foundation Europe e.V.
   5
   6    The main author of Bacula is Kern Sibbald, with contributions from
   7    many others, a complete list can be found in the file AUTHORS.
   8    This program is Free Software; you can redistribute it and/or
   9    modify it under the terms of version three of the GNU Affero General Public
  10    License as published by the Free Software Foundation and included
  11    in the file LICENSE.
  12
  13    This program is distributed in the hope that it will be useful, but
  14    WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16    General Public License for more details.
  17
  18    You should have received a copy of the GNU Affero General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  21    02110-1301, USA.
  22
  23    Bacula® is a registered trademark of Kern Sibbald.
  24    The licensor of Bacula is the Free Software Foundation Europe
  25    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  26    Switzerland, email:ftf@fsfeurope.org.
  27 */
  28 /*
  29  *  Bacula hash table routines
  30  *
  31  *  htable is a hash table of items (pointers). This code is
  32  *    adapted and enhanced from code I wrote in 1982 for a
  33  *    relocatable linker.  At that time, the hash table size
  34  *    was fixed and a primary number, which essentially provides
  35  *    the randomness. In this program, the hash table can grow when
  36  *    it gets too full, so the table size here is a binary number. The
  37  *    hashing is provided using an idea from Tcl where the initial
  38  *    hash code is "randomized" using a simple calculation from
  39  *    a random number generator that multiplies by a big number
  40  *    (I multiply by a prime number, while Tcl did not)
  41  *    then shifts the result down and does the binary division
  42  *    by masking.  Increasing the size of the hash table is simple.
  43  *    Just create a new larger table, walk the old table and
  44  *    re-hash insert each entry into the new table.
  45  *
  46  *
  47  *   Kern Sibbald, July MMIII
  48  *
  49  */
  50
  51 #include "bacula.h"
  52 #include "htable.h"
  53
  54 #define PAGE_SIZE 4096
  55 #define MAX_PAGES 2400
  56 #define MAX_BUF_SIZE (MAX_PAGES * PAGE_SIZE)  /* approx 10MB */
  57
  58 static const int dbglvl = 500;
  59
  60 /* ===================================================================
  61  *    htable
  62  */
  63
  64 #ifdef BIG_MALLOC
  65 /*
  66  * This subroutine gets a big buffer.
  67  */
  68 void htable::malloc_big_buf(int size)
  69 {
  70    struct h_mem *hmem;
  71
  72    hmem = (struct h_mem *)malloc(size);
  73    total_size += size;
  74    blocks++;
  75    hmem->next = mem_block;
  76    mem_block = hmem;
  77    hmem->mem = mem_block->first;
  78    hmem->rem = (char *)hmem + size - hmem->mem;
  79    Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
  80 }
  81
  82 /* This routine frees the whole tree */
  83 void htable::hash_big_free()
  84 {
  85    struct h_mem *hmem, *rel;
  86
  87    for (hmem=mem_block; hmem; ) {
  88       rel = hmem;
  89       hmem = hmem->next;
  90       Dmsg1(100, "free malloc buf=%p\n", rel);
  91       free(rel);
  92    }
  93 }
  94
  95 #endif
  96
  97 /*
  98  * Normal hash malloc routine that gets a
  99  *  "small" buffer from the big buffer
 100  */
 101 char *htable::hash_malloc(int size)
 102 {
 103 #ifdef BIG_MALLOC
 104    char *buf;
 105    int asize = BALIGN(size);
 106
 107    if (mem_block->rem < asize) {
 108       uint32_t mb_size;
 109       if (total_size >= (MAX_BUF_SIZE / 2)) {
 110          mb_size = MAX_BUF_SIZE;
 111       } else {
 112          mb_size = MAX_BUF_SIZE / 2;
 113       }
 114       malloc_big_buf(mb_size);
 115    }
 116    mem_block->rem -= asize;
 117    buf = mem_block->mem;
 118    mem_block->mem += asize;
 119    return buf;
 120 #else
 121    total_size += size;
 122    blocks++;
 123    return (char *)malloc(size);
 124 #endif
 125 }
 126
 127
 128
 129
 130 /*
 131  * Create hash of key, stored in hash then
 132  *  create and return the pseudo random bucket index
 133  */
 134 void htable::hash_index(char *key)
 135 {
 136    hash = 0;
 137    for (char *p=key; *p; p++) {
 138 //    hash += (hash << 3) + (uint32_t)*p;
 139       hash +=  ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
 140    }
 141    /* Multiply by large prime number, take top bits, mask for remainder */
 142    index = ((hash * 1103515249) >> rshift) & mask;
 143    Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
 144 }
 145
 146 /*
 147  * tsize is the estimated number of entries in the hash table
 148  */
 149 htable::htable(void *item, void *link, int tsize)
 150 {
 151    init(item, link, tsize);
 152 }
 153
 154 void htable::init(void *item, void *link, int tsize)
 155 {
 156    int pwr;
 157
 158    memset(this, 0, sizeof(htable));
 159    if (tsize < 31) {
 160       tsize = 31;
 161    }
 162    tsize >>= 2;
 163    for (pwr=0; tsize; pwr++) {
 164       tsize >>= 1;
 165    }
 166    loffset = (char *)link - (char *)item;
 167    mask = ~((~0)<<pwr);               /* 3 bits => table size = 8 */
 168    rshift = 30 - pwr;                 /* start using bits 28, 29, 30 */
 169    buckets = 1<<pwr;                  /* hash table size -- power of two */
 170    max_items = buckets * 4;           /* allow average 4 entries per chain */
 171    table = (hlink **)malloc(buckets * sizeof(hlink *));
 172    memset(table, 0, buckets * sizeof(hlink *));
 173 #ifdef BIG_MALLOC
 174    malloc_big_buf(MAX_BUF_SIZE);   /* ***FIXME*** need variable or some estimate */
 175 #endif
 176 }
 177
 178 uint32_t htable::size()
 179 {
 180    return num_items;
 181 }
 182
 183 /*
 184  * Take each hash link and walk down the chain of items
 185  *  that hash there counting them (i.e. the hits),
 186  *  then report that number.
 187  * Obiously, the more hits in a chain, the more time
 188  *  it takes to reference them. Empty chains are not so
 189  *  hot either -- as it means unused or wasted space.
 190  */
 191 #define MAX_COUNT 20
 192 void htable::stats()
 193 {
 194    int hits[MAX_COUNT];
 195    int max = 0;
 196    int i, j;
 197    hlink *p;
 198    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
 199    printf("Hits/bucket: buckets\n");
 200    for (i=0; i < MAX_COUNT; i++) {
 201       hits[i] = 0;
 202    }
 203    for (i=0; i<(int)buckets; i++) {
 204       p = table[i];
 205       j = 0;
 206       while (p) {
 207          p = (hlink *)(p->next);
 208          j++;
 209       }
 210       if (j > max) {
 211          max = j;
 212       }
 213       if (j < MAX_COUNT) {
 214          hits[j]++;
 215       }
 216    }
 217    for (i=0; i < MAX_COUNT; i++) {
 218       printf("%2d:           %d\n",i, hits[i]);
 219    }
 220    printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
 221    printf("max hits in a bucket = %d\n", max);
 222 #ifdef BIG_MALLOC
 223    printf("total bytes malloced = %d\n", total_size);
 224    printf("total blocks malloced = %d\n", blocks);
 225 #endif
 226 }
 227
 228 void htable::grow_table()
 229 {
 230    Dmsg1(100, "Grow called old size = %d\n", buckets);
 231    /* Setup a bigger table */
 232    htable *big = (htable *)malloc(sizeof(htable));
 233    memcpy(big, this, sizeof(htable));  /* start with original class data */
 234    big->loffset = loffset;
 235    big->mask = mask<<1 | 1;
 236    big->rshift = rshift - 1;
 237    big->num_items = 0;
 238    big->buckets = buckets * 2;
 239    big->max_items = big->buckets * 4;
 240    /* Create a bigger hash table */
 241    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
 242    memset(big->table, 0, big->buckets * sizeof(hlink *));
 243    big->walkptr = NULL;
 244    big->walk_index = 0;
 245    /* Insert all the items in the new hash table */
 246    Dmsg1(100, "Before copy num_items=%d\n", num_items);
 247    /*
 248     * We walk through the old smaller tree getting items,
 249     * but since we are overwriting the colision links, we must
 250     * explicitly save the item->next pointer and walk each
 251     * colision chain ourselves.  We do use next() for getting
 252     * to the next bucket.
 253     */
 254    for (void *item=first(); item; ) {
 255       void *ni = ((hlink *)((char *)item+loffset))->next;  /* save link overwritten by insert */
 256       Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
 257       big->insert(((hlink *)((char *)item+loffset))->key, item);
 258       if (ni) {
 259          item = (void *)((char *)ni-loffset);
 260       } else {
 261          walkptr = NULL;
 262          item = next();
 263       }
 264    }
 265    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
 266    if (num_items != big->num_items) {
 267       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
 268    }
 269    free(table);
 270    memcpy(this, big, sizeof(htable));  /* move everything across */
 271    free(big);
 272    Dmsg0(100, "Exit grow.\n");
 273 }
 274
 275 bool htable::insert(char *key, void *item)
 276 {
 277    hlink *hp;
 278    if (lookup(key)) {
 279       return false;                   /* already exists */
 280    }
 281    ASSERT(index < buckets);
 282    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 283    hp = (hlink *)(((char *)item)+loffset);
 284    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 285       index, item, loffset);
 286    hp->next = table[index];
 287    hp->hash = hash;
 288    hp->key = key;
 289    table[index] = hp;
 290    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
 291       hp->next, hp->hash, hp->key);
 292
 293    if (++num_items >= max_items) {
 294       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 295       grow_table();
 296    }
 297    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
 298    return true;
 299 }
 300
 301 void *htable::lookup(char *key)
 302 {
 303    hash_index(key);
 304    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 305 //    Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
 306       if (hash == hp->hash && strcmp(key, hp->key) == 0) {
 307          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 308          return ((char *)hp)-loffset;
 309       }
 310    }
 311    return NULL;
 312 }
 313
 314 void *htable::next()
 315 {
 316    Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
 317    if (walkptr) {
 318       walkptr = (hlink *)(walkptr->next);
 319    }
 320    while (!walkptr && walk_index < buckets) {
 321       walkptr = table[walk_index++];
 322       if (walkptr) {
 323          Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
 324             walkptr->next, walk_index-1);
 325       }
 326    }
 327    if (walkptr) {
 328       Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
 329          ((char *)walkptr)-loffset, walk_index);
 330       return ((char *)walkptr)-loffset;
 331    }
 332    Dmsg0(dbglvl, "next: return NULL\n");
 333    return NULL;
 334 }
 335
 336 void *htable::first()
 337 {
 338    Dmsg0(dbglvl, "Enter first\n");
 339    walkptr = table[0];                /* get first bucket */
 340    walk_index = 1;                    /* Point to next index */
 341    while (!walkptr && walk_index < buckets) {
 342       walkptr = table[walk_index++];  /* go to next bucket */
 343       if (walkptr) {
 344          Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
 345             walkptr->next, walk_index-1);
 346       }
 347    }
 348    if (walkptr) {
 349       Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
 350       return ((char *)walkptr)-loffset;
 351    }
 352    Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
 353    return NULL;
 354 }
 355
 356 /* Destroy the table and its contents */
 357 void htable::destroy()
 358 {
 359 #ifdef BIG_MALLOC
 360    hash_big_free();
 361 #else
 362    void *ni;
 363    void *li = first();
 364
 365    while (li) {
 366       ni = next();
 367       free(li);
 368       li=ni;
 369    }
 370 #endif
 371
 372    free(table);
 373    table = NULL;
 374    Dmsg0(100, "Done destroy.\n");
 375 }
 376
 377
 378
 379 #ifdef TEST_PROGRAM
 380
 381 struct MYJCR {
 382    char *key;
 383    hlink link;
 384 };
 385
 386 #define NITEMS 5000000
 387
 388 int main()
 389 {
 390    char mkey[30];
 391    htable *jcrtbl;
 392    MYJCR *save_jcr = NULL, *item;
 393    MYJCR *jcr = NULL;
 394    int count = 0;
 395
 396    jcrtbl = (htable *)malloc(sizeof(htable));
 397    jcrtbl->init(jcr, &jcr->link, NITEMS);
 398
 399    Dmsg1(000, "Inserting %d items\n", NITEMS);
 400    for (int i=0; i<NITEMS; i++) {
 401       int len;
 402       len = sprintf(mkey, "This is htable item %d", i) + 1;
 403
 404       jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
 405       jcr->key = (char *)jcrtbl->hash_malloc(len);
 406       memcpy(jcr->key, mkey, len);
 407       Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
 408
 409       jcrtbl->insert(jcr->key, jcr);
 410       if (i == 10) {
 411          save_jcr = jcr;
 412       }
 413    }
 414    if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
 415       printf("Bad news: %s not found.\n", save_jcr->key);
 416    } else {
 417       printf("Item 10's key is: %s\n", item->key);
 418    }
 419
 420    jcrtbl->stats();
 421    printf("Walk the hash table:\n");
 422    foreach_htable (jcr, jcrtbl) {
 423 //    printf("htable item = %s\n", jcr->key);
 424 #ifndef BIG_MALLOC
 425       free(jcr->key);
 426 #endif
 427       count++;
 428    }
 429    printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
 430    printf("Calling destroy\n");
 431    jcrtbl->destroy();
 432
 433    free(jcrtbl);
 434    printf("Freed jcrtbl\n");
 435
 436    sm_dump(false);   /* unit test */
 437
 438 }
 439 #endif