2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2015 Kern Sibbald
5 Copyright (C) 2003-2014 Free Software Foundation Europe e.V.
7 The original author of Bacula is Kern Sibbald, with contributions
8 from many others, a complete list can be found in the file AUTHORS.
10 You may use this file and others of this release according to the
11 license defined in the LICENSE file, which includes the Affero General
12 Public License, v3.0 ("AGPLv3") and some additional permissions and
13 terms pursuant to its AGPLv3 Section 7.
15 This notice must be preserved when any source code is
16 conveyed and/or propagated.
18 Bacula(R) is a registered trademark of Kern Sibbald.
21 * Bacula hash table routines
23 * htable is a hash table of items (pointers). This code is
24 * adapted and enhanced from code I wrote in 1982 for a
25 * relocatable linker. At that time, the hash table size
26 * was fixed and a primary number, which essentially provides
27 * the randomness. In this program, the hash table can grow when
28 * it gets too full, so the table size here is a binary number. The
29 * hashing is provided using an idea from Tcl where the initial
30 * hash code is "randomized" using a simple calculation from
31 * a random number generator that multiplies by a big number
32 * (I multiply by a prime number, while Tcl did not)
33 * then shifts the result down and does the binary division
34 * by masking. Increasing the size of the hash table is simple.
35 * Just create a new larger table, walk the old table and
36 * re-hash insert each entry into the new table.
39 * Kern Sibbald, July MMIII
46 #define lli long long int
47 static const int dbglvl = 500;
49 /* ===================================================================
55 * This subroutine gets a big buffer.
57 void htable::malloc_big_buf(int size)
61 hmem = (struct h_mem *)malloc(size);
64 hmem->next = mem_block;
66 hmem->mem = mem_block->first;
67 hmem->rem = (char *)hmem + size - hmem->mem;
68 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
71 /* This routine frees the whole tree */
72 void htable::hash_big_free()
74 struct h_mem *hmem, *rel;
76 for (hmem=mem_block; hmem; ) {
79 Dmsg1(100, "free malloc buf=%p\n", rel);
87 * Normal hash malloc routine that gets a
88 * "small" buffer from the big buffer
90 char *htable::hash_malloc(int size)
94 int asize = BALIGN(size);
96 if (mem_block->rem < asize) {
98 if (total_size >= 1000000) {
103 malloc_big_buf(mb_size);
105 mem_block->rem -= asize;
106 buf = mem_block->mem;
107 mem_block->mem += asize;
112 return (char *)malloc(size);
120 * Create hash of key, stored in hash then
121 * create and return the pseudo random bucket index
123 void htable::hash_index(char *key)
126 for (char *p=key; *p; p++) {
127 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
129 /* Multiply by large prime number, take top bits, mask for remainder */
130 index = ((hash * 1103515249LL) >> rshift) & mask;
131 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
134 void htable::hash_index(uint64_t ikey)
136 hash = ikey; /* already have starting binary hash */
137 /* Same algorithm as for char * */
138 index = ((hash * 1103515249LL) >> rshift) & mask;
139 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
143 * tsize is the estimated number of entries in the hash table
145 htable::htable(void *item, void *link, int tsize)
147 init(item, link, tsize);
150 void htable::init(void *item, void *link, int tsize)
154 memset(this, 0, sizeof(htable));
159 for (pwr=0; tsize; pwr++) {
162 loffset = (char *)link - (char *)item;
163 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
164 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
165 buckets = 1<<pwr; /* hash table size -- power of two */
166 max_items = buckets * 4; /* allow average 4 entries per chain */
167 table = (hlink **)malloc(buckets * sizeof(hlink *));
168 memset(table, 0, buckets * sizeof(hlink *));
170 malloc_big_buf(1000000); /* ***FIXME*** need variable or some estimate */
171 #endif /* BIG_MALLOC */
174 uint32_t htable::size()
180 * Take each hash link and walk down the chain of items
181 * that hash there counting them (i.e. the hits),
182 * then report that number.
183 * Obiously, the more hits in a chain, the more time
184 * it takes to reference them. Empty chains are not so
185 * hot either -- as it means unused or wasted space.
194 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
195 printf("Hits/bucket: buckets\n");
196 for (i=0; i < MAX_COUNT; i++) {
199 for (i=0; i<(int)buckets; i++) {
203 p = (hlink *)(p->next);
213 for (i=0; i < MAX_COUNT; i++) {
214 printf("%2d: %d\n",i, hits[i]);
216 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
217 printf("max hits in a bucket = %d\n", max);
219 printf("total bytes malloced = %lld\n", (lli)total_size);
220 printf("total blocks malloced = %d\n", blocks);
224 void htable::grow_table()
226 Dmsg1(100, "Grow called old size = %d\n", buckets);
227 /* Setup a bigger table */
228 htable *big = (htable *)malloc(sizeof(htable));
229 memcpy(big, this, sizeof(htable)); /* start with original class data */
230 big->loffset = loffset;
231 big->mask = mask<<1 | 1;
232 big->rshift = rshift - 1;
234 big->buckets = buckets * 2;
235 big->max_items = big->buckets * 4;
236 /* Create a bigger hash table */
237 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
238 memset(big->table, 0, big->buckets * sizeof(hlink *));
241 /* Insert all the items in the new hash table */
242 Dmsg1(100, "Before copy num_items=%d\n", num_items);
244 * We walk through the old smaller tree getting items,
245 * but since we are overwriting the colision links, we must
246 * explicitly save the item->next pointer and walk each
247 * colision chain ourselves. We do use next() for getting
248 * to the next bucket.
250 for (void *item=first(); item; ) {
251 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
252 hlink *hp = (hlink *)((char *)item+loffset);
254 Dmsg1(100, "Grow insert: %lld\n", hp->key.ikey);
255 big->insert(hp->key.ikey, item);
257 Dmsg1(100, "Grow insert: %s\n", hp->key.key);
258 big->insert(hp->key.key, item);
261 item = (void *)((char *)ni-loffset);
267 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
268 if (num_items != big->num_items) {
269 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
272 memcpy(this, big, sizeof(htable)); /* move everything across */
274 Dmsg0(100, "Exit grow.\n");
277 bool htable::insert(char *key, void *item)
281 return false; /* already exists */
283 ASSERT(index < buckets);
284 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
285 hp = (hlink *)(((char *)item)+loffset);
286 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
287 index, item, loffset);
288 hp->next = table[index];
293 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
294 hp->next, hp->hash, hp->key.key);
296 if (++num_items >= max_items) {
297 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
300 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
304 void *htable::lookup(char *key)
307 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
308 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.key);
309 if (hash == hp->hash && strcmp(key, hp->key.key) == 0) {
310 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
311 return ((char *)hp)-loffset;
317 bool htable::insert(uint64_t ikey, void *item)
321 return false; /* already exists */
323 ASSERT(index < buckets);
324 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
325 hp = (hlink *)(((char *)item)+loffset);
326 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
328 hp->next = table[index];
333 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->ikey=%lld\n", hp->next,
334 hp->hash, hp->key.ikey);
336 if (++num_items >= max_items) {
337 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
340 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n",
341 index, num_items, ikey);
345 void *htable::lookup(uint64_t ikey)
348 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
349 // Dmsg2(100, "hp=%p key=%lld\n", hp, hp->key.ikey);
350 if (hash == hp->hash && ikey == hp->key.ikey) {
351 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
352 return ((char *)hp)-loffset;
360 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
362 walkptr = (hlink *)(walkptr->next);
364 while (!walkptr && walk_index < buckets) {
365 walkptr = table[walk_index++];
367 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
368 walkptr->next, walk_index-1);
372 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
373 ((char *)walkptr)-loffset, walk_index);
374 return ((char *)walkptr)-loffset;
376 Dmsg0(dbglvl, "next: return NULL\n");
380 void *htable::first()
382 Dmsg0(dbglvl, "Enter first\n");
383 walkptr = table[0]; /* get first bucket */
384 walk_index = 1; /* Point to next index */
385 while (!walkptr && walk_index < buckets) {
386 walkptr = table[walk_index++]; /* go to next bucket */
388 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
389 walkptr->next, walk_index-1);
393 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
394 return ((char *)walkptr)-loffset;
396 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
400 /* Destroy the table and its contents */
401 void htable::destroy()
418 Dmsg0(100, "Done destroy.\n");
430 #define NITEMS 5000000
436 MYJCR *save_jcr = NULL, *item;
440 jcrtbl = (htable *)malloc(sizeof(htable));
441 jcrtbl->init(jcr, &jcr->link, NITEMS);
443 Dmsg1(000, "Inserting %d items\n", NITEMS);
444 for (int i=0; i<NITEMS; i++) {
446 len = sprintf(mkey, "This is htable item %d", i) + 1;
448 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
449 jcr->key = (char *)jcrtbl->hash_malloc(len);
450 memcpy(jcr->key, mkey, len);
451 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
453 jcrtbl->insert(jcr->key, jcr);
458 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
459 printf("Bad news: %s not found.\n", save_jcr->key);
461 printf("Item 10's key is: %s\n", item->key);
465 printf("Walk the hash table:\n");
466 foreach_htable (jcr, jcrtbl) {
467 // printf("htable item = %s\n", jcr->key);
473 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
474 printf("Calling destroy\n");
478 printf("Freed jcrtbl\n");
480 sm_dump(false); /* unit test */