2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
53 #define B_PAGE_SIZE 4096
55 #define MAX_PAGES 2400
56 #define MIN_BUF_SIZE (MIN_PAGES * B_PAGE_SIZE) /* 128 Kb */
57 #define MAX_BUF_SIZE (MAX_PAGES * B_PAGE_SIZE) /* approx 10MB */
59 static const int dbglvl = 500;
61 /* ===================================================================
66 * This subroutine gets a big buffer.
68 void htable::malloc_big_buf(int size)
72 hmem = (struct h_mem *)malloc(size);
75 hmem->next = mem_block;
77 hmem->mem = mem_block->first;
78 hmem->rem = (char *)hmem + size - hmem->mem;
79 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
82 /* This routine frees the whole tree */
83 void htable::hash_big_free()
85 struct h_mem *hmem, *rel;
87 for (hmem=mem_block; hmem; ) {
90 Dmsg1(100, "free malloc buf=%p\n", rel);
96 * Normal hash malloc routine that gets a
97 * "small" buffer from the big buffer
99 char *htable::hash_malloc(int size)
103 int asize = BALIGN(size);
105 if (mem_block->rem < asize) {
106 if (total_size >= (extend_length / 2)) {
107 mb_size = extend_length;
109 mb_size = extend_length / 2;
111 malloc_big_buf(mb_size);
112 Dmsg1(100, "Created new big buffer of %ld bytes\n", mb_size);
114 mem_block->rem -= asize;
115 buf = mem_block->mem;
116 mem_block->mem += asize;
121 * Create hash of key, stored in hash then
122 * create and return the pseudo random bucket index
124 void htable::hash_index(char *key)
127 for (char *p=key; *p; p++) {
128 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
130 /* Multiply by large prime number, take top bits, mask for remainder */
131 index = ((hash * 1103515249) >> rshift) & mask;
132 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
135 void htable::hash_index(uint32_t key)
138 /* Multiply by large prime number, take top bits, mask for remainder */
139 index = ((hash * 1103515249) >> rshift) & mask;
140 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
143 void htable::hash_index(uint64_t key)
146 /* Multiply by large prime number, take top bits, mask for remainder */
147 index = ((hash * 1103515249) >> rshift) & mask;
148 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
152 * tsize is the estimated number of entries in the hash table
154 htable::htable(void *item, void *link, int tsize, int nr_pages)
156 init(item, link, tsize, nr_pages);
159 void htable::init(void *item, void *link, int tsize, int nr_pages)
165 memset(this, 0, sizeof(htable));
170 for (pwr=0; tsize; pwr++) {
173 loffset = (char *)link - (char *)item;
174 mask = ~((~0) << pwr); /* 3 bits => table size = 8 */
175 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
176 buckets = 1 << pwr; /* hash table size -- power of two */
177 max_items = buckets * 4; /* allow average 4 entries per chain */
178 table = (hlink **)malloc(buckets * sizeof(hlink *));
179 memset(table, 0, buckets * sizeof(hlink *));
181 #ifdef HAVE_GETPAGESIZE
182 pagesize = getpagesize();
184 pagesize = B_PAGE_SIZE;
187 buffer_size = MAX_BUF_SIZE;
189 buffer_size = pagesize * nr_pages;
190 if (buffer_size > MAX_BUF_SIZE) {
191 buffer_size = MAX_BUF_SIZE;
192 } else if (buffer_size < MIN_BUF_SIZE) {
193 buffer_size = MIN_BUF_SIZE;
196 malloc_big_buf(buffer_size);
197 extend_length = buffer_size;
198 Dmsg1(100, "Allocated big buffer of %ld bytes\n", buffer_size);
201 uint32_t htable::size()
207 * Take each hash link and walk down the chain of items
208 * that hash there counting them (i.e. the hits),
209 * then report that number.
210 * Obiously, the more hits in a chain, the more time
211 * it takes to reference them. Empty chains are not so
212 * hot either -- as it means unused or wasted space.
221 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
222 printf("Hits/bucket: buckets\n");
223 for (i=0; i < MAX_COUNT; i++) {
226 for (i=0; i<(int)buckets; i++) {
230 p = (hlink *)(p->next);
240 for (i=0; i < MAX_COUNT; i++) {
241 printf("%2d: %d\n",i, hits[i]);
243 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
244 printf("max hits in a bucket = %d\n", max);
245 printf("total bytes malloced = %lld\n", (long long int)total_size);
246 printf("total blocks malloced = %d\n", blocks);
249 void htable::grow_table()
255 Dmsg1(100, "Grow called old size = %d\n", buckets);
256 /* Setup a bigger table */
257 big = (htable *)malloc(sizeof(htable));
258 memcpy(big, this, sizeof(htable)); /* start with original class data */
259 big->loffset = loffset;
260 big->mask = mask<<1 | 1;
261 big->rshift = rshift - 1;
263 big->buckets = buckets * 2;
264 big->max_items = big->buckets * 4;
265 /* Create a bigger hash table */
266 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
267 memset(big->table, 0, big->buckets * sizeof(hlink *));
270 /* Insert all the items in the new hash table */
271 Dmsg1(100, "Before copy num_items=%d\n", num_items);
273 * We walk through the old smaller tree getting items,
274 * but since we are overwriting the colision links, we must
275 * explicitly save the item->next pointer and walk each
276 * colision chain ourselves. We do use next() for getting
277 * to the next bucket.
279 for (void *item=first(); item; ) {
280 cur = (hlink *)((char *)item+loffset);
281 ni = cur->next; /* save link overwritten by insert */
282 switch (cur->key_type) {
284 Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
285 big->insert(cur->key.char_key, item);
287 case KEY_TYPE_UINT32:
288 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
289 big->insert(cur->key.uint32_key, item);
291 case KEY_TYPE_UINT64:
292 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint64_key);
293 big->insert(cur->key.uint64_key, item);
297 item = (void *)((char *)ni-loffset);
303 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
304 if (num_items != big->num_items) {
305 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
308 memcpy(this, big, sizeof(htable)); /* move everything across */
310 Dmsg0(100, "Exit grow.\n");
313 bool htable::insert(char *key, void *item)
318 return false; /* already exists */
320 ASSERT(index < buckets);
321 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
322 hp = (hlink *)(((char *)item)+loffset);
323 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
324 index, item, loffset);
325 hp->next = table[index];
327 hp->key_type = KEY_TYPE_CHAR;
328 hp->key.char_key = key;
330 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n",
331 hp->next, hp->hash, hp->key.char_key);
333 if (++num_items >= max_items) {
334 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
337 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
341 bool htable::insert(uint32_t key, void *item)
346 return false; /* already exists */
348 ASSERT(index < buckets);
349 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
350 hp = (hlink *)(((char *)item)+loffset);
351 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
352 index, item, loffset);
353 hp->next = table[index];
355 hp->key_type = KEY_TYPE_UINT32;
356 hp->key.uint32_key = key;
358 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%d\n",
359 hp->next, hp->hash, hp->key.uint32_key);
361 if (++num_items >= max_items) {
362 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
365 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%d\n", index, num_items, key);
369 bool htable::insert(uint64_t key, void *item)
374 return false; /* already exists */
376 ASSERT(index < buckets);
377 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
378 hp = (hlink *)(((char *)item)+loffset);
379 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
380 index, item, loffset);
381 hp->next = table[index];
383 hp->key_type = KEY_TYPE_UINT64;
384 hp->key.uint64_key = key;
386 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n",
387 hp->next, hp->hash, hp->key.uint64_key);
389 if (++num_items >= max_items) {
390 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
393 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n", index, num_items, key);
397 void *htable::lookup(char *key)
400 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
401 ASSERT(hp->key_type == KEY_TYPE_CHAR);
402 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.char_key);
403 if (hash == hp->hash && strcmp(key, hp->key.char_key) == 0) {
404 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
405 return ((char *)hp)-loffset;
411 void *htable::lookup(uint32_t key)
414 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
415 ASSERT(hp->key_type == KEY_TYPE_UINT32);
416 if (hash == hp->hash && key == hp->key.uint32_key) {
417 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
418 return ((char *)hp)-loffset;
424 void *htable::lookup(uint64_t key)
427 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
428 ASSERT(hp->key_type == KEY_TYPE_UINT64);
429 if (hash == hp->hash && key == hp->key.uint64_key) {
430 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
431 return ((char *)hp)-loffset;
439 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
441 walkptr = (hlink *)(walkptr->next);
443 while (!walkptr && walk_index < buckets) {
444 walkptr = table[walk_index++];
446 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
447 walkptr->next, walk_index-1);
451 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
452 ((char *)walkptr)-loffset, walk_index);
453 return ((char *)walkptr)-loffset;
455 Dmsg0(dbglvl, "next: return NULL\n");
459 void *htable::first()
461 Dmsg0(dbglvl, "Enter first\n");
462 walkptr = table[0]; /* get first bucket */
463 walk_index = 1; /* Point to next index */
464 while (!walkptr && walk_index < buckets) {
465 walkptr = table[walk_index++]; /* go to next bucket */
467 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
468 walkptr->next, walk_index-1);
472 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
473 return ((char *)walkptr)-loffset;
475 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
479 /* Destroy the table and its contents */
480 void htable::destroy()
486 garbage_collect_memory();
487 Dmsg0(100, "Done destroy.\n");
495 #ifndef TEST_NON_CHAR
503 #ifndef TEST_SMALL_HTABLE
504 #define NITEMS 5000000
513 MYJCR *save_jcr = NULL, *item;
517 jcrtbl = (htable *)malloc(sizeof(htable));
519 #ifndef TEST_SMALL_HTABLE
520 jcrtbl->init(jcr, &jcr->link, NITEMS);
522 jcrtbl->init(jcr, &jcr->link, NITEMS, 128);
524 Dmsg1(000, "Inserting %d items\n", NITEMS);
525 for (int i=0; i<NITEMS; i++) {
526 #ifndef TEST_NON_CHAR
528 len = sprintf(mkey, "This is htable item %d", i) + 1;
530 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
531 jcr->key = (char *)jcrtbl->hash_malloc(len);
532 memcpy(jcr->key, mkey, len);
534 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
537 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
539 jcrtbl->insert(jcr->key, jcr);
544 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
545 printf("Bad news: %s not found.\n", save_jcr->key);
547 #ifndef TEST_NON_CHAR
548 printf("Item 10's key is: %s\n", item->key);
550 printf("Item 10's key is: %ld\n", item->key);
555 printf("Walk the hash table:\n");
556 foreach_htable (jcr, jcrtbl) {
557 #ifndef TEST_NON_CHAR
558 // printf("htable item = %s\n", jcr->key);
565 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
566 printf("Calling destroy\n");
570 printf("Freed jcrtbl\n");
572 sm_dump(false); /* unit test */