2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2011 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
53 #define PAGE_SIZE 4096
54 #define MAX_PAGES 2400
55 #define MAX_BUF_SIZE (MAX_PAGES * PAGE_SIZE) /* approx 10MB */
57 static const int dbglvl = 500;
59 /* ===================================================================
64 * This subroutine gets a big buffer.
66 void htable::malloc_big_buf(int size)
70 hmem = (struct h_mem *)malloc(size);
73 hmem->next = mem_block;
75 hmem->mem = mem_block->first;
76 hmem->rem = (char *)hmem + size - hmem->mem;
77 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
80 /* This routine frees the whole tree */
81 void htable::hash_big_free()
83 struct h_mem *hmem, *rel;
85 for (hmem=mem_block; hmem; ) {
88 Dmsg1(100, "free malloc buf=%p\n", rel);
94 * Normal hash malloc routine that gets a
95 * "small" buffer from the big buffer
97 char *htable::hash_malloc(int size)
100 int asize = BALIGN(size);
102 if (mem_block->rem < asize) {
104 if (total_size >= (MAX_BUF_SIZE / 2)) {
105 mb_size = MAX_BUF_SIZE;
107 mb_size = MAX_BUF_SIZE / 2;
109 malloc_big_buf(mb_size);
111 mem_block->rem -= asize;
112 buf = mem_block->mem;
113 mem_block->mem += asize;
118 * Create hash of key, stored in hash then
119 * create and return the pseudo random bucket index
121 void htable::hash_index(char *key)
124 for (char *p=key; *p; p++) {
125 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
127 /* Multiply by large prime number, take top bits, mask for remainder */
128 index = ((hash * 1103515249) >> rshift) & mask;
129 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
132 void htable::hash_index(uint32_t key)
135 /* Multiply by large prime number, take top bits, mask for remainder */
136 index = ((hash * 1103515249) >> rshift) & mask;
137 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
140 void htable::hash_index(uint64_t key)
143 /* Multiply by large prime number, take top bits, mask for remainder */
144 index = ((hash * 1103515249) >> rshift) & mask;
145 Dmsg2(dbglvl, "Leave hash_index hash=0x%llx index=%d\n", hash, index);
149 * tsize is the estimated number of entries in the hash table
151 htable::htable(void *item, void *link, int tsize)
153 init(item, link, tsize);
156 void htable::init(void *item, void *link, int tsize)
160 memset(this, 0, sizeof(htable));
165 for (pwr=0; tsize; pwr++) {
168 loffset = (char *)link - (char *)item;
169 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
170 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
171 buckets = 1<<pwr; /* hash table size -- power of two */
172 max_items = buckets * 4; /* allow average 4 entries per chain */
173 table = (hlink **)malloc(buckets * sizeof(hlink *));
174 memset(table, 0, buckets * sizeof(hlink *));
175 malloc_big_buf(MAX_BUF_SIZE); /* ***FIXME*** need variable or some estimate */
178 uint32_t htable::size()
184 * Take each hash link and walk down the chain of items
185 * that hash there counting them (i.e. the hits),
186 * then report that number.
187 * Obiously, the more hits in a chain, the more time
188 * it takes to reference them. Empty chains are not so
189 * hot either -- as it means unused or wasted space.
198 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
199 printf("Hits/bucket: buckets\n");
200 for (i=0; i < MAX_COUNT; i++) {
203 for (i=0; i<(int)buckets; i++) {
207 p = (hlink *)(p->next);
217 for (i=0; i < MAX_COUNT; i++) {
218 printf("%2d: %d\n",i, hits[i]);
220 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
221 printf("max hits in a bucket = %d\n", max);
222 printf("total bytes malloced = %lld\n", (long long int)total_size);
223 printf("total blocks malloced = %d\n", blocks);
226 void htable::grow_table()
232 Dmsg1(100, "Grow called old size = %d\n", buckets);
233 /* Setup a bigger table */
234 big = (htable *)malloc(sizeof(htable));
235 memcpy(big, this, sizeof(htable)); /* start with original class data */
236 big->loffset = loffset;
237 big->mask = mask<<1 | 1;
238 big->rshift = rshift - 1;
240 big->buckets = buckets * 2;
241 big->max_items = big->buckets * 4;
242 /* Create a bigger hash table */
243 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
244 memset(big->table, 0, big->buckets * sizeof(hlink *));
247 /* Insert all the items in the new hash table */
248 Dmsg1(100, "Before copy num_items=%d\n", num_items);
250 * We walk through the old smaller tree getting items,
251 * but since we are overwriting the colision links, we must
252 * explicitly save the item->next pointer and walk each
253 * colision chain ourselves. We do use next() for getting
254 * to the next bucket.
256 for (void *item=first(); item; ) {
257 cur = (hlink *)((char *)item+loffset);
258 ni = cur->next; /* save link overwritten by insert */
259 switch (cur->key_type) {
261 Dmsg1(100, "Grow insert: %s\n", cur->key.char_key);
262 big->insert(cur->key.char_key, item);
264 case KEY_TYPE_UINT32:
265 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint32_key);
266 big->insert(cur->key.uint32_key, item);
268 case KEY_TYPE_UINT64:
269 Dmsg1(100, "Grow insert: %ld\n", cur->key.uint64_key);
270 big->insert(cur->key.uint64_key, item);
274 item = (void *)((char *)ni-loffset);
280 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
281 if (num_items != big->num_items) {
282 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
285 memcpy(this, big, sizeof(htable)); /* move everything across */
287 Dmsg0(100, "Exit grow.\n");
290 bool htable::insert(char *key, void *item)
295 return false; /* already exists */
297 ASSERT(index < buckets);
298 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
299 hp = (hlink *)(((char *)item)+loffset);
300 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
301 index, item, loffset);
302 hp->next = table[index];
304 hp->key_type = KEY_TYPE_CHAR;
305 hp->key.char_key = key;
307 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%s\n",
308 hp->next, hp->hash, hp->key.char_key);
310 if (++num_items >= max_items) {
311 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
314 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
318 bool htable::insert(uint32_t key, void *item)
323 return false; /* already exists */
325 ASSERT(index < buckets);
326 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
327 hp = (hlink *)(((char *)item)+loffset);
328 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
329 index, item, loffset);
330 hp->next = table[index];
332 hp->key_type = KEY_TYPE_UINT32;
333 hp->key.uint32_key = key;
335 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%d\n",
336 hp->next, hp->hash, hp->key.uint32_key);
338 if (++num_items >= max_items) {
339 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
342 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%d\n", index, num_items, key);
346 bool htable::insert(uint64_t key, void *item)
351 return false; /* already exists */
353 ASSERT(index < buckets);
354 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
355 hp = (hlink *)(((char *)item)+loffset);
356 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
357 index, item, loffset);
358 hp->next = table[index];
360 hp->key_type = KEY_TYPE_UINT64;
361 hp->key.uint64_key = key;
363 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%llx hp->key=%ld\n",
364 hp->next, hp->hash, hp->key.uint64_key);
366 if (++num_items >= max_items) {
367 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
370 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n", index, num_items, key);
374 void *htable::lookup(char *key)
377 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
378 ASSERT(hp->key_type == KEY_TYPE_CHAR);
379 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.char_key);
380 if (hash == hp->hash && strcmp(key, hp->key.char_key) == 0) {
381 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
382 return ((char *)hp)-loffset;
388 void *htable::lookup(uint32_t key)
391 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
392 ASSERT(hp->key_type == KEY_TYPE_UINT32);
393 if (hash == hp->hash && key == hp->key.uint32_key) {
394 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
395 return ((char *)hp)-loffset;
401 void *htable::lookup(uint64_t key)
404 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
405 ASSERT(hp->key_type == KEY_TYPE_UINT64);
406 if (hash == hp->hash && key == hp->key.uint64_key) {
407 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
408 return ((char *)hp)-loffset;
416 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
418 walkptr = (hlink *)(walkptr->next);
420 while (!walkptr && walk_index < buckets) {
421 walkptr = table[walk_index++];
423 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
424 walkptr->next, walk_index-1);
428 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
429 ((char *)walkptr)-loffset, walk_index);
430 return ((char *)walkptr)-loffset;
432 Dmsg0(dbglvl, "next: return NULL\n");
436 void *htable::first()
438 Dmsg0(dbglvl, "Enter first\n");
439 walkptr = table[0]; /* get first bucket */
440 walk_index = 1; /* Point to next index */
441 while (!walkptr && walk_index < buckets) {
442 walkptr = table[walk_index++]; /* go to next bucket */
444 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
445 walkptr->next, walk_index-1);
449 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
450 return ((char *)walkptr)-loffset;
452 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
456 /* Destroy the table and its contents */
457 void htable::destroy()
463 garbage_collect_memory();
464 Dmsg0(100, "Done destroy.\n");
472 #ifndef TEST_NON_CHAR
480 #define NITEMS 5000000
486 MYJCR *save_jcr = NULL, *item;
490 jcrtbl = (htable *)malloc(sizeof(htable));
491 jcrtbl->init(jcr, &jcr->link, NITEMS);
493 Dmsg1(000, "Inserting %d items\n", NITEMS);
494 for (int i=0; i<NITEMS; i++) {
495 #ifndef TEST_NON_CHAR
497 len = sprintf(mkey, "This is htable item %d", i) + 1;
499 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
500 jcr->key = (char *)jcrtbl->hash_malloc(len);
501 memcpy(jcr->key, mkey, len);
503 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
506 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
508 jcrtbl->insert(jcr->key, jcr);
513 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
514 printf("Bad news: %s not found.\n", save_jcr->key);
516 #ifndef TEST_NON_CHAR
517 printf("Item 10's key is: %s\n", item->key);
519 printf("Item 10's key is: %ld\n", item->key);
524 printf("Walk the hash table:\n");
525 foreach_htable (jcr, jcrtbl) {
526 #ifndef TEST_NON_CHAR
527 // printf("htable item = %s\n", jcr->key);
534 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
535 printf("Calling destroy\n");
539 printf("Freed jcrtbl\n");
541 sm_dump(false); /* unit test */