2 Bacula(R) - The Network Backup Solution
4 Copyright (C) 2000-2017 Kern Sibbald
6 The original author of Bacula is Kern Sibbald, with contributions
7 from many others, a complete list can be found in the file AUTHORS.
9 You may use this file and others of this release according to the
10 license defined in the LICENSE file, which includes the Affero General
11 Public License, v3.0 ("AGPLv3") and some additional permissions and
12 terms pursuant to its AGPLv3 Section 7.
14 This notice must be preserved when any source code is
15 conveyed and/or propagated.
17 Bacula(R) is a registered trademark of Kern Sibbald.
20 * Bacula hash table routines
22 * htable is a hash table of items (pointers). This code is
23 * adapted and enhanced from code I wrote in 1982 for a
24 * relocatable linker. At that time, the hash table size
25 * was fixed and a primary number, which essentially provides
26 * the randomness. In this program, the hash table can grow when
27 * it gets too full, so the table size here is a binary number. The
28 * hashing is provided using an idea from Tcl where the initial
29 * hash code is "randomized" using a simple calculation from
30 * a random number generator that multiplies by a big number
31 * (I multiply by a prime number, while Tcl did not)
32 * then shifts the result down and does the binary division
33 * by masking. Increasing the size of the hash table is simple.
34 * Just create a new larger table, walk the old table and
35 * re-hash insert each entry into the new table.
38 * Kern Sibbald, July MMIII
45 #define lli long long int
46 static const int dbglvl = 500;
48 /* ===================================================================
54 * This subroutine gets a big buffer.
56 void htable::malloc_big_buf(int size)
60 hmem = (struct h_mem *)malloc(size);
63 hmem->next = mem_block;
65 hmem->mem = mem_block->first;
66 hmem->rem = (char *)hmem + size - hmem->mem;
67 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
70 /* This routine frees the whole tree */
71 void htable::hash_big_free()
73 struct h_mem *hmem, *rel;
75 for (hmem=mem_block; hmem; ) {
78 Dmsg1(100, "free malloc buf=%p\n", rel);
86 * Normal hash malloc routine that gets a
87 * "small" buffer from the big buffer
89 char *htable::hash_malloc(int size)
93 int asize = BALIGN(size);
95 if (mem_block->rem < asize) {
97 if (total_size >= 1000000) {
102 malloc_big_buf(mb_size);
104 mem_block->rem -= asize;
105 buf = mem_block->mem;
106 mem_block->mem += asize;
111 return (char *)malloc(size);
119 * Create hash of key, stored in hash then
120 * create and return the pseudo random bucket index
122 void htable::hash_index(char *key)
125 for (char *p=key; *p; p++) {
126 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
128 /* Multiply by large prime number, take top bits, mask for remainder */
129 index = ((hash * 1103515249LL) >> rshift) & mask;
130 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
133 void htable::hash_index(uint64_t ikey)
135 hash = ikey; /* already have starting binary hash */
136 /* Same algorithm as for char * */
137 index = ((hash * 1103515249LL) >> rshift) & mask;
138 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
142 * tsize is the estimated number of entries in the hash table
144 htable::htable(void *item, void *link, int tsize)
146 init(item, link, tsize);
149 void htable::init(void *item, void *link, int tsize)
153 bmemzero(this, sizeof(htable));
158 for (pwr=0; tsize; pwr++) {
161 loffset = (char *)link - (char *)item;
162 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
163 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
164 buckets = 1<<pwr; /* hash table size -- power of two */
165 max_items = buckets * 4; /* allow average 4 entries per chain */
166 table = (hlink **)malloc(buckets * sizeof(hlink *));
167 bmemzero(table, buckets * sizeof(hlink *));
169 malloc_big_buf(1000000); /* ***FIXME*** need variable or some estimate */
170 #endif /* BIG_MALLOC */
173 uint32_t htable::size()
179 * Take each hash link and walk down the chain of items
180 * that hash there counting them (i.e. the hits),
181 * then report that number.
182 * Obiously, the more hits in a chain, the more time
183 * it takes to reference them. Empty chains are not so
184 * hot either -- as it means unused or wasted space.
193 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
194 printf("Hits/bucket: buckets\n");
195 for (i=0; i < MAX_COUNT; i++) {
198 for (i=0; i<(int)buckets; i++) {
202 p = (hlink *)(p->next);
212 for (i=0; i < MAX_COUNT; i++) {
213 printf("%2d: %d\n",i, hits[i]);
215 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
216 printf("max hits in a bucket = %d\n", max);
218 printf("total bytes malloced = %lld\n", (lli)total_size);
219 printf("total blocks malloced = %d\n", blocks);
223 void htable::grow_table()
225 Dmsg1(100, "Grow called old size = %d\n", buckets);
226 /* Setup a bigger table */
227 htable *big = (htable *)malloc(sizeof(htable));
228 memcpy(big, this, sizeof(htable)); /* start with original class data */
229 big->loffset = loffset;
230 big->mask = mask<<1 | 1;
231 big->rshift = rshift - 1;
233 big->buckets = buckets * 2;
234 big->max_items = big->buckets * 4;
235 /* Create a bigger hash table */
236 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
237 bmemzero(big->table, big->buckets * sizeof(hlink *));
240 /* Insert all the items in the new hash table */
241 Dmsg1(100, "Before copy num_items=%d\n", num_items);
243 * We walk through the old smaller tree getting items,
244 * but since we are overwriting the colision links, we must
245 * explicitly save the item->next pointer and walk each
246 * colision chain ourselves. We do use next() for getting
247 * to the next bucket.
249 for (void *item=first(); item; ) {
250 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
251 hlink *hp = (hlink *)((char *)item+loffset);
253 Dmsg1(100, "Grow insert: %lld\n", hp->key.ikey);
254 big->insert(hp->key.ikey, item);
256 Dmsg1(100, "Grow insert: %s\n", hp->key.key);
257 big->insert(hp->key.key, item);
260 item = (void *)((char *)ni-loffset);
266 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
267 if (num_items != big->num_items) {
268 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
271 memcpy(this, big, sizeof(htable)); /* move everything across */
273 Dmsg0(100, "Exit grow.\n");
276 bool htable::insert(char *key, void *item)
280 return false; /* already exists */
282 ASSERT(index < buckets);
283 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
284 hp = (hlink *)(((char *)item)+loffset);
285 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
286 index, item, loffset);
287 hp->next = table[index];
292 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
293 hp->next, hp->hash, hp->key.key);
295 if (++num_items >= max_items) {
296 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
299 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
303 void *htable::lookup(char *key)
306 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
307 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key.key);
308 if (hash == hp->hash && strcmp(key, hp->key.key) == 0) {
309 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
310 return ((char *)hp)-loffset;
316 bool htable::insert(uint64_t ikey, void *item)
320 return false; /* already exists */
322 ASSERT(index < buckets);
323 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
324 hp = (hlink *)(((char *)item)+loffset);
325 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp, index,
327 hp->next = table[index];
332 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->ikey=%lld\n", hp->next,
333 hp->hash, hp->key.ikey);
335 if (++num_items >= max_items) {
336 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
339 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%lld\n",
340 index, num_items, ikey);
344 void *htable::lookup(uint64_t ikey)
347 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
348 // Dmsg2(100, "hp=%p key=%lld\n", hp, hp->key.ikey);
349 if (hash == hp->hash && ikey == hp->key.ikey) {
350 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
351 return ((char *)hp)-loffset;
359 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
361 walkptr = (hlink *)(walkptr->next);
363 while (!walkptr && walk_index < buckets) {
364 walkptr = table[walk_index++];
366 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
367 walkptr->next, walk_index-1);
371 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
372 ((char *)walkptr)-loffset, walk_index);
373 return ((char *)walkptr)-loffset;
375 Dmsg0(dbglvl, "next: return NULL\n");
379 void *htable::first()
381 Dmsg0(dbglvl, "Enter first\n");
382 walkptr = table[0]; /* get first bucket */
383 walk_index = 1; /* Point to next index */
384 while (!walkptr && walk_index < buckets) {
385 walkptr = table[walk_index++]; /* go to next bucket */
387 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
388 walkptr->next, walk_index-1);
392 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
393 return ((char *)walkptr)-loffset;
395 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
399 /* Destroy the table and its contents */
400 void htable::destroy()
417 Dmsg0(100, "Done destroy.\n");
429 #define NITEMS 5000000
435 MYJCR *save_jcr = NULL, *item;
439 jcrtbl = (htable *)malloc(sizeof(htable));
440 jcrtbl->init(jcr, &jcr->link, NITEMS);
442 Dmsg1(000, "Inserting %d items\n", NITEMS);
443 for (int i=0; i<NITEMS; i++) {
445 len = sprintf(mkey, "This is htable item %d", i) + 1;
447 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
448 jcr->key = (char *)jcrtbl->hash_malloc(len);
449 memcpy(jcr->key, mkey, len);
450 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
452 jcrtbl->insert(jcr->key, jcr);
457 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
458 printf("Bad news: %s not found.\n", save_jcr->key);
460 printf("Item 10's key is: %s\n", item->key);
464 printf("Walk the hash table:\n");
465 foreach_htable (jcr, jcrtbl) {
466 // printf("htable item = %s\n", jcr->key);
472 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
473 printf("Calling destroy\n");
477 printf("Freed jcrtbl\n");
479 sm_dump(false); /* unit test */