2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2018 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Bacula hash table routines
22 * htable is a hash table of items (pointers). This code is
23 * adapted and enhanced from code I wrote in 1982 for a
24 * relocatable linker. At that time, the hash table size
25 * was fixed and a primary number, which essentially provides
26 * the randomness. In this program, the hash table can grow when
27 * it gets too full, so the table size here is a binary number. The
28 * hashing is provided using an idea from Tcl where the initial
29 * hash code is "randomized" using a simple calculation from
30 * a random number generator that multiplies by a big number
31 * (I multiply by a prime number, while Tcl did not)
32 * then shifts the result down and does the binary division
33 * by masking. Increasing the size of the hash table is simple.
34 * Just create a new larger table, walk the old table and
35 * re-hash insert each entry into the new table.
38 * Kern Sibbald, July MMIII
45 #define lli long long int
46 static const int dbglvl = 500;
48 /* ===================================================================
54 * This subroutine gets a big buffer.
56 void htable::malloc_big_buf(int size)
60 hmem = (struct h_mem *)malloc(size);
63 hmem->next = mem_block;
65 hmem->mem = mem_block->first;
66 hmem->rem = (char *)hmem + size - hmem->mem;
67 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
70 /* This routine frees the whole tree */
71 void htable::hash_big_free()
73 struct h_mem *hmem, *rel;
75 for (hmem=mem_block; hmem; ) {
78 Dmsg1(100, "free malloc buf=%p\n", rel);
86 * Normal hash malloc routine that gets a
87 * "small" buffer from the big buffer
89 char *htable::hash_malloc(int size)
93 int asize = BALIGN(size);
95 if (mem_block->rem < asize) {
97 if (total_size >= 1000000) {
102 malloc_big_buf(mb_size);
104 mem_block->rem -= asize;
105 buf = mem_block->mem;
106 mem_block->mem += asize;
111 return (char *)malloc(size);
119 * Create hash of key, stored in hash then
120 * create and return the pseudo random bucket index
122 void htable::hash_index(char *key)
125 for (char *p=key; *p; p++) {
126 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
128 /* Multiply by large prime number, take top bits, mask for remainder */
129 index = ((hash * 1103515249LL) >> rshift) & mask;
130 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
133 void htable::hash_index(uint64_t ikey)
135 hash = ikey; /* already have starting binary hash */
136 /* Same algorithm as for char * */
137 index = ((hash * 1103515249LL) >> rshift) & mask;
138 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
142 * tsize is the estimated number of entries in the hash table
144 htable::htable(void *item, void *link, int tsize)
146 init(item, link, tsize);
149 void htable::init(void *item, void *link, int tsize)
153 bmemzero(this, sizeof(htable));
158 for (pwr=0; tsize; pwr++) {
161 loffset = (char *)link - (char *)item;
162 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
163 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
164 buckets = 1<<pwr; /* hash table size -- power of two */
165 max_items = buckets * 4; /* allow average 4 entries per chain */
166 table = (hlink **)malloc(buckets * sizeof(hlink *));
167 bmemzero(table, buckets * sizeof(hlink *));
169 malloc_big_buf(1000000); /* ***FIXME*** need variable or some estimate */
170 #endif /* BIG_MALLOC */
173 uint32_t htable::size()
179 * Take each hash link and walk down the chain of items
180 * that hash there counting them (i.e. the hits),
181 * then report that number.
182 * Obiously, the more hits in a chain, the more time
183 * it takes to reference them. Empty chains are not so
184 * hot either -- as it means unused or wasted space.
193 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
194 printf("Hits/bucket: buckets\n");
195 for (i=0; i < MAX_COUNT; i++) {
198 for (i=0; i<(int)buckets; i++) {
202 p = (hlink *)(p->next);
212 for (i=0; i < MAX_COUNT; i++) {
213 printf("%2d: %d\n",i, hits[i]);
215 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
216 printf("max hits in a bucket = %d\n", max);
219 edit_uint64(total_size, ed1);
220 printf("total bytes malloced = %s\n", ed1);
221 printf("total blocks malloced = %d\n", blocks);
225 void htable::grow_table()
227 Dmsg1(100, "Grow called old size = %d\n", buckets);
228 /* Setup a bigger table */
229 htable *big = (htable *)malloc(sizeof(htable));
230 memcpy(big, this, sizeof(htable)); /* start with original class data */
231 big->loffset = loffset;
232 big->mask = mask<<1 | 1;
233 big->rshift = rshift - 1;
235 big->buckets = buckets * 2;
236 big->max_items = big->buckets * 4;
237 /* Create a bigger hash table */
238 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
239 bmemzero(big->table, big->buckets * sizeof(hlink *));
242 /* Insert all the items in the new hash table */
243 Dmsg1(100, "Before copy num_items=%d\n", num_items);
245 * We walk through the old smaller tree getting items,
246 * but since we are overwriting the colision links, we must
247 * explicitly save the item->next pointer and walk each
248 * colision chain ourselves. We do use next() for getting
249 * to the next bucket.
251 for (void *item=first(); item; ) {
252 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
253 hlink *hp = (hlink *)((char *)item+loffset);
255 Dmsg1(100, "Grow insert: %lld\n", hp->key.ikey);
256 big->insert(hp->key.ikey, item);
258 Dmsg1(100, "Grow insert: %s\n", hp->key.key);
259 big->insert(hp->key.key, item);
262 item = (void *)((char *)ni-loffset);
268 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
269 if (num_items != big->num_items) {
270 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
273 memcpy(this, big, sizeof(htable)); /* move everything across */
275 Dmsg0(100, "Exit grow.\n");
278 bool htable::insert(char *key, void *item)
282 return false; /* already exists */
284 ASSERT(index < buckets);
285 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
286 hp = (hlink *)(((char *)item)+loffset);
287 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
288 index, item, loffset);
289 hp->next = table[index];
294 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
295 hp->next, hp->hash, hp->key.key);
297 if (++num_items >= max_items) {
298 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
301 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
305 void *htable::lookup(char *key)
308 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
309 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.key);
310 if (hash == hp->hash && strcmp(key, hp->key.key) == 0) {
311 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
312 return ((char *)hp)-loffset;
318 bool htable::insert(uint64_t ikey, void *item)
322 return false; /* already exists */
324 ASSERT(index < buckets);
325 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
326 hp = (hlink *)(((char *)item)+loffset);
327 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
329 hp->next = table[index];
334 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->ikey=%lld\n", hp->next,
335 hp->hash, hp->key.ikey);
337 if (++num_items >= max_items) {
338 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
341 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n",
342 index, num_items, ikey);
346 void *htable::lookup(uint64_t ikey)
349 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
350 // Dmsg2(100, "hp=%p key=%lld\n", hp, hp->key.ikey);
351 if (hash == hp->hash && ikey == hp->key.ikey) {
352 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
353 return ((char *)hp)-loffset;
361 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
363 walkptr = (hlink *)(walkptr->next);
365 while (!walkptr && walk_index < buckets) {
366 walkptr = table[walk_index++];
368 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
369 walkptr->next, walk_index-1);
373 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
374 ((char *)walkptr)-loffset, walk_index);
375 return ((char *)walkptr)-loffset;
377 Dmsg0(dbglvl, "next: return NULL\n");
381 void *htable::first()
383 Dmsg0(dbglvl, "Enter first\n");
384 walkptr = table[0]; /* get first bucket */
385 walk_index = 1; /* Point to next index */
386 while (!walkptr && walk_index < buckets) {
387 walkptr = table[walk_index++]; /* go to next bucket */
389 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
390 walkptr->next, walk_index-1);
394 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
395 return ((char *)walkptr)-loffset;
397 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
401 /* Destroy the table and its contents */
402 void htable::destroy()
419 Dmsg0(100, "Done destroy.\n");
431 #define NITEMS 5000000
437 MYJCR *save_jcr = NULL, *item;
441 jcrtbl = (htable *)malloc(sizeof(htable));
442 jcrtbl->init(jcr, &jcr->link, NITEMS);
444 Dmsg1(000, "Inserting %d items\n", NITEMS);
445 for (int i=0; i<NITEMS; i++) {
447 len = sprintf(mkey, "This is htable item %d", i) + 1;
449 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
450 jcr->key = (char *)jcrtbl->hash_malloc(len);
451 memcpy(jcr->key, mkey, len);
452 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
454 jcrtbl->insert(jcr->key, jcr);
459 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
460 printf("Bad news: %s not found.\n", save_jcr->key);
462 printf("Item 10's key is: %s\n", item->key);
466 printf("Walk the hash table:\n");
467 foreach_htable (jcr, jcrtbl) {
468 // printf("htable item = %s\n", jcr->key);
474 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
475 printf("Calling destroy\n");
479 printf("Freed jcrtbl\n");
481 sm_dump(false); /* unit test */