git.sur5r.net Git - bacula/bacula/blob - bacula/src/lib/htable.c

   1 /*
   2    Bacula® - The Network Backup Solution
   3
   4    Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
   5
   6    The main author of Bacula is Kern Sibbald, with contributions from
   7    many others, a complete list can be found in the file AUTHORS.
   8    This program is Free Software; you can redistribute it and/or
   9    modify it under the terms of version three of the GNU Affero General Public
  10    License as published by the Free Software Foundation and included
  11    in the file LICENSE.
  12
  13    This program is distributed in the hope that it will be useful, but
  14    WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16    General Public License for more details.
  17
  18    You should have received a copy of the GNU Affero General Public License
  19    along with this program; if not, write to the Free Software
  20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  21    02110-1301, USA.
  22
  23    Bacula® is a registered trademark of Kern Sibbald.
  24    The licensor of Bacula is the Free Software Foundation Europe
  25    (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
  26    Switzerland, email:ftf@fsfeurope.org.
  27 */
  28 /*
  29  *  Bacula hash table routines
  30  *
  31  *  htable is a hash table of items (pointers). This code is
  32  *    adapted and enhanced from code I wrote in 1982 for a
  33  *    relocatable linker.  At that time, the hash table size
  34  *    was fixed and a primary number, which essentially provides
  35  *    the randomness. In this program, the hash table can grow when
  36  *    it gets too full, so the table size here is a binary number. The
  37  *    hashing is provided using an idea from Tcl where the initial
  38  *    hash code is "randomized" using a simple calculation from
  39  *    a random number generator that multiplies by a big number
  40  *    (I multiply by a prime number, while Tcl did not)
  41  *    then shifts the result down and does the binary division
  42  *    by masking.  Increasing the size of the hash table is simple.
  43  *    Just create a new larger table, walk the old table and
  44  *    re-hash insert each entry into the new table.
  45  *
  46  *
  47  *   Kern Sibbald, July MMIII
  48  *
  49  */
  50
  51 #include "bacula.h"
  52
  53 #define B_PAGE_SIZE 4096
  54 #define MIN_PAGES 32
  55 #define MAX_PAGES 2400
  56 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
  57 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
  58
  59 static const int dbglvl = 500;
  60
  61 /* ===================================================================
  62  *    htable
  63  */
  64
  65 /*
  66  * This subroutine gets a big buffer.
  67  */
  68 void htable::malloc_big_buf(int size)
  69 {
  70    struct h_mem *hmem;
  71
  72    hmem = (struct h_mem *)malloc(size);
  73    total_size += size;
  74    blocks++;
  75    hmem->next = mem_block;
  76    mem_block = hmem;
  77    hmem->mem = mem_block->first;
  78    hmem->rem = (char *)hmem + size - hmem->mem;
  79    Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
  80 }
  81
  82 /* This routine frees the whole tree */
  83 void htable::hash_big_free()
  84 {
  85    struct h_mem *hmem, *rel;
  86
  87    for (hmem=mem_block; hmem; ) {
  88       rel = hmem;
  89       hmem = hmem->next;
  90       Dmsg1(100, "free malloc buf=%p\n", rel);
  91       free(rel);
  92    }
  93 }
  94
  95 /*
  96  * Normal hash malloc routine that gets a
  97  *  "small" buffer from the big buffer
  98  */
  99 char *htable::hash_malloc(int size)
 100 {
 101    int mb_size;
 102    char *buf;
 103    int asize = BALIGN(size);
 104
 105    if (mem_block->rem < asize) {
 106       if (total_size >= (extend_length / 2)) {
 107          mb_size = extend_length;
 108       } else {
 109          mb_size = extend_length / 2;
 110       }
 111       malloc_big_buf(mb_size);
 112       Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
 113    }
 114    mem_block->rem -= asize;
 115    buf = mem_block->mem;
 116    mem_block->mem += asize;
 117    return buf;
 118 }
 119
 120 /*
 121  * Create hash of key, stored in hash then
 122  *  create and return the pseudo random bucket index
 123  */
 124 void htable::hash_index(char *key)
 125 {
 126    hash = 0;
 127    for (char *p=key; *p; p++) {
 128       hash +=  ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
 129    }
 130    /* Multiply by large prime number, take top bits, mask for remainder */
 131    index = ((hash * 1103515249) >> rshift) & mask;
 132    Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
 133 }
 134
 135 void htable::hash_index(uint32_t key)
 136 {
 137    hash = key;
 138    /* Multiply by large prime number, take top bits, mask for remainder */
 139    index = ((hash * 1103515249) >> rshift) & mask;
 140    Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
 141 }
 142
 143 void htable::hash_index(uint64_t key)
 144 {
 145    hash = key;
 146    /* Multiply by large prime number, take top bits, mask for remainder */
 147    index = ((hash * 1103515249) >> rshift) & mask;
 148    Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
 149 }
 150
 151 /*
 152  * tsize is the estimated number of entries in the hash table
 153  */
 154 htable::htable(void *item, void *link, int tsize, int nr_pages)
 155 {
 156    init(item, link, tsize, nr_pages);
 157 }
 158
 159 void htable::init(void *item, void *link, int tsize, int nr_pages)
 160 {
 161    int pwr;
 162    int pagesize;
 163    int buffer_size;
 164
 165    memset(this, 0, sizeof(htable));
 166    if (tsize < 31) {
 167       tsize = 31;
 168    }
 169    tsize >>= 2;
 170    for (pwr=0; tsize; pwr++) {
 171       tsize >>= 1;
 172    }
 173    loffset = (char *)link - (char *)item;
 174    mask = ~((~0) << pwr);             /* 3 bits => table size = 8 */
 175    rshift = 30 - pwr;                 /* start using bits 28, 29, 30 */
 176    buckets = 1 << pwr;                /* hash table size -- power of two */
 177    max_items = buckets * 4;           /* allow average 4 entries per chain */
 178    table = (hlink **)malloc(buckets * sizeof(hlink *));
 179    memset(table, 0, buckets * sizeof(hlink *));
 180
 181 #ifdef HAVE_GETPAGESIZE
 182    pagesize = getpagesize();
 183 #else
 184    pagesize = B_PAGE_SIZE;
 185 #endif
 186    if (nr_pages == 0) {
 187       buffer_size = MAX_BUF_SIZE;
 188    } else {
 189       buffer_size = pagesize * nr_pages;
 190       if (buffer_size > MAX_BUF_SIZE) {
 191          buffer_size = MAX_BUF_SIZE;
 192       } else if (buffer_size < MIN_BUF_SIZE) {
 193          buffer_size = MIN_BUF_SIZE;
 194       }
 195    }
 196    malloc_big_buf(buffer_size);
 197    extend_length = buffer_size;
 198    Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
 199 }
 200
 201 uint32_t htable::size()
 202 {
 203    return num_items;
 204 }
 205
 206 /*
 207  * Take each hash link and walk down the chain of items
 208  *  that hash there counting them (i.e. the hits),
 209  *  then report that number.
 210  * Obiously, the more hits in a chain, the more time
 211  *  it takes to reference them. Empty chains are not so
 212  *  hot either -- as it means unused or wasted space.
 213  */
 214 #define MAX_COUNT 20
 215 void htable::stats()
 216 {
 217    int hits[MAX_COUNT];
 218    int max = 0;
 219    int i, j;
 220    hlink *p;
 221    printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
 222    printf("Hits/bucket: buckets\n");
 223    for (i=0; i < MAX_COUNT; i++) {
 224       hits[i] = 0;
 225    }
 226    for (i=0; i<(int)buckets; i++) {
 227       p = table[i];
 228       j = 0;
 229       while (p) {
 230          p = (hlink *)(p->next);
 231          j++;
 232       }
 233       if (j > max) {
 234          max = j;
 235       }
 236       if (j < MAX_COUNT) {
 237          hits[j]++;
 238       }
 239    }
 240    for (i=0; i < MAX_COUNT; i++) {
 241       printf("%2d:           %d\n",i, hits[i]);
 242    }
 243    printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
 244    printf("max hits in a bucket = %d\n", max);
 245    printf("total bytes malloced = %lld\n", (long long int)total_size);
 246    printf("total blocks malloced = %d\n", blocks);
 247 }
 248
 249 void htable::grow_table()
 250 {
 251    htable *big;
 252    hlink *cur;
 253    void *ni;
 254
 255    Dmsg1(100, "Grow called old size = %d\n", buckets);
 256    /* Setup a bigger table */
 257    big = (htable *)malloc(sizeof(htable));
 258    memcpy(big, this, sizeof(htable));  /* start with original class data */
 259    big->loffset = loffset;
 260    big->mask = mask<<1 | 1;
 261    big->rshift = rshift - 1;
 262    big->num_items = 0;
 263    big->buckets = buckets * 2;
 264    big->max_items = big->buckets * 4;
 265    /* Create a bigger hash table */
 266    big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
 267    memset(big->table, 0, big->buckets * sizeof(hlink *));
 268    big->walkptr = NULL;
 269    big->walk_index = 0;
 270    /* Insert all the items in the new hash table */
 271    Dmsg1(100, "Before copy num_items=%d\n", num_items);
 272    /*
 273     * We walk through the old smaller tree getting items,
 274     * but since we are overwriting the colision links, we must
 275     * explicitly save the item->next pointer and walk each
 276     * colision chain ourselves.  We do use next() for getting
 277     * to the next bucket.
 278     */
 279    for (void *item=first(); item; ) {
 280       cur = (hlink *)((char *)item+loffset);
 281       ni = cur->next; /* save link overwritten by insert */
 282       switch (cur->key_type) {
 283       case KEY_TYPE_CHAR:
 284          Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
 285          big->insert(cur->key.char_key, item);
 286          break;
 287       case KEY_TYPE_UINT32:
 288          Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
 289          big->insert(cur->key.uint32_key, item);
 290          break;
 291       case KEY_TYPE_UINT64:
 292          Dmsg1(100, "Grow insert: %ld\n", cur->key.uint64_key);
 293          big->insert(cur->key.uint64_key, item);
 294          break;
 295       }
 296       if (ni) {
 297          item = (void *)((char *)ni-loffset);
 298       } else {
 299          walkptr = NULL;
 300          item = next();
 301       }
 302    }
 303    Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
 304    if (num_items != big->num_items) {
 305       Dmsg0(000, "****** Big problems num_items mismatch ******\n");
 306    }
 307    free(table);
 308    memcpy(this, big, sizeof(htable));  /* move everything across */
 309    free(big);
 310    Dmsg0(100, "Exit grow.\n");
 311 }
 312
 313 bool htable::insert(char *key, void *item)
 314 {
 315    hlink *hp;
 316
 317    if (lookup(key)) {
 318       return false;                   /* already exists */
 319    }
 320    ASSERT(index < buckets);
 321    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 322    hp = (hlink *)(((char *)item)+loffset);
 323    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 324       index, item, loffset);
 325    hp->next = table[index];
 326    hp->hash = hash;
 327    hp->key_type = KEY_TYPE_CHAR;
 328    hp->key.char_key = key;
 329    table[index] = hp;
 330    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n",
 331       hp->next, hp->hash, hp->key.char_key);
 332
 333    if (++num_items >= max_items) {
 334       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 335       grow_table();
 336    }
 337    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
 338    return true;
 339 }
 340
 341 bool htable::insert(uint32_t key, void *item)
 342 {
 343    hlink *hp;
 344
 345    if (lookup(key)) {
 346       return false;                   /* already exists */
 347    }
 348    ASSERT(index < buckets);
 349    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 350    hp = (hlink *)(((char *)item)+loffset);
 351    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 352       index, item, loffset);
 353    hp->next = table[index];
 354    hp->hash = hash;
 355    hp->key_type = KEY_TYPE_UINT32;
 356    hp->key.uint32_key = key;
 357    table[index] = hp;
 358    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%d\n",
 359       hp->next, hp->hash, hp->key.uint32_key);
 360
 361    if (++num_items >= max_items) {
 362       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 363       grow_table();
 364    }
 365    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%d\n", index, num_items, key);
 366    return true;
 367 }
 368
 369 bool htable::insert(uint64_t key, void *item)
 370 {
 371    hlink *hp;
 372
 373    if (lookup(key)) {
 374       return false;                   /* already exists */
 375    }
 376    ASSERT(index < buckets);
 377    Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
 378    hp = (hlink *)(((char *)item)+loffset);
 379    Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
 380       index, item, loffset);
 381    hp->next = table[index];
 382    hp->hash = hash;
 383    hp->key_type = KEY_TYPE_UINT64;
 384    hp->key.uint64_key = key;
 385    table[index] = hp;
 386    Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n",
 387       hp->next, hp->hash, hp->key.uint64_key);
 388
 389    if (++num_items >= max_items) {
 390       Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
 391       grow_table();
 392    }
 393    Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n", index, num_items, key);
 394    return true;
 395 }
 396
 397 void *htable::lookup(char *key)
 398 {
 399    hash_index(key);
 400    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 401       ASSERT(hp->key_type == KEY_TYPE_CHAR);
 402 //    Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.char_key);
 403       if (hash == hp->hash && strcmp(key, hp->key.char_key) == 0) {
 404          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 405          return ((char *)hp)-loffset;
 406       }
 407    }
 408    return NULL;
 409 }
 410
 411 void *htable::lookup(uint32_t key)
 412 {
 413    hash_index(key);
 414    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 415       ASSERT(hp->key_type == KEY_TYPE_UINT32);
 416       if (hash == hp->hash && key == hp->key.uint32_key) {
 417          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 418          return ((char *)hp)-loffset;
 419       }
 420    }
 421    return NULL;
 422 }
 423
 424 void *htable::lookup(uint64_t key)
 425 {
 426    hash_index(key);
 427    for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
 428       ASSERT(hp->key_type == KEY_TYPE_UINT64);
 429       if (hash == hp->hash && key == hp->key.uint64_key) {
 430          Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
 431          return ((char *)hp)-loffset;
 432       }
 433    }
 434    return NULL;
 435 }
 436
 437 void *htable::next()
 438 {
 439    Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
 440    if (walkptr) {
 441       walkptr = (hlink *)(walkptr->next);
 442    }
 443    while (!walkptr && walk_index < buckets) {
 444       walkptr = table[walk_index++];
 445       if (walkptr) {
 446          Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
 447             walkptr->next, walk_index-1);
 448       }
 449    }
 450    if (walkptr) {
 451       Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
 452          ((char *)walkptr)-loffset, walk_index);
 453       return ((char *)walkptr)-loffset;
 454    }
 455    Dmsg0(dbglvl, "next: return NULL\n");
 456    return NULL;
 457 }
 458
 459 void *htable::first()
 460 {
 461    Dmsg0(dbglvl, "Enter first\n");
 462    walkptr = table[0];                /* get first bucket */
 463    walk_index = 1;                    /* Point to next index */
 464    while (!walkptr && walk_index < buckets) {
 465       walkptr = table[walk_index++];  /* go to next bucket */
 466       if (walkptr) {
 467          Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
 468             walkptr->next, walk_index-1);
 469       }
 470    }
 471    if (walkptr) {
 472       Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
 473       return ((char *)walkptr)-loffset;
 474    }
 475    Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
 476    return NULL;
 477 }
 478
 479 /* Destroy the table and its contents */
 480 void htable::destroy()
 481 {
 482    hash_big_free();
 483
 484    free(table);
 485    table = NULL;
 486    garbage_collect_memory();
 487    Dmsg0(100, "Done destroy.\n");
 488 }
 489
 490
 491
 492 #ifdef TEST_PROGRAM
 493
 494 struct MYJCR {
 495 #ifndef TEST_NON_CHAR
 496    char *key;
 497 #else
 498    uint32_t key;
 499 #endif
 500    hlink link;
 501 };
 502
 503 #ifndef TEST_SMALL_HTABLE
 504 #define NITEMS 5000000
 505 #else
 506 #define NITEMS 5000
 507 #endif
 508
 509 int main()
 510 {
 511    char mkey[30];
 512    htable *jcrtbl;
 513    MYJCR *save_jcr = NULL, *item;
 514    MYJCR *jcr = NULL;
 515    int count = 0;
 516
 517    jcrtbl = (htable *)malloc(sizeof(htable));
 518
 519 #ifndef TEST_SMALL_HTABLE
 520    jcrtbl->init(jcr, &jcr->link, NITEMS);
 521 #else
 522    jcrtbl->init(jcr, &jcr->link, NITEMS, 128);
 523 #endif
 524    Dmsg1(000, "Inserting %d items\n", NITEMS);
 525    for (int i=0; i<NITEMS; i++) {
 526 #ifndef TEST_NON_CHAR
 527       int len;
 528       len = sprintf(mkey, "This is htable item %d", i) + 1;
 529
 530       jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
 531       jcr->key = (char *)jcrtbl->hash_malloc(len);
 532       memcpy(jcr->key, mkey, len);
 533 #else
 534       jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
 535       jcr->key = i;
 536 #endif
 537       Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
 538
 539       jcrtbl->insert(jcr->key, jcr);
 540       if (i == 10) {
 541          save_jcr = jcr;
 542       }
 543    }
 544    if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
 545       printf("Bad news: %s not found.\n", save_jcr->key);
 546    } else {
 547 #ifndef TEST_NON_CHAR
 548       printf("Item 10's key is: %s\n", item->key);
 549 #else
 550       printf("Item 10's key is: %ld\n", item->key);
 551 #endif
 552    }
 553
 554    jcrtbl->stats();
 555    printf("Walk the hash table:\n");
 556    foreach_htable (jcr, jcrtbl) {
 557 #ifndef TEST_NON_CHAR
 558 //    printf("htable item = %s\n", jcr->key);
 559 #ifndef BIG_MALLOC
 560       free(jcr->key);
 561 #endif
 562 #endif
 563       count++;
 564    }
 565    printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
 566    printf("Calling destroy\n");
 567    jcrtbl->destroy();
 568
 569    free(jcrtbl);
 570    printf("Freed jcrtbl\n");
 571
 572    sm_dump(false);   /* unit test */
 573
 574 }
 575 #endif