2 * Bacula hash table routines
4 * htable is a hash table of items (pointers). This code is
5 * adapted and enhanced from code I wrote in 1982 for a
6 * relocatable linker. At that time, the hash table size
7 * was fixed and a primary number, which essentially provides
8 * the randomness. In this program, the hash table can grow when
9 * it gets too full, so the table size here is a binary number. The
10 * hashing is provided using an idea from Tcl where the initial
11 * hash code is "randomized" using a simple calculation from
12 * a random number generator that multiplies by a big number
13 * (I multiply by a prime number, while Tcl did not)
14 * then shifts the result down and does the binary division
15 * by masking. Increasing the size of the hash table is simple.
16 * Just create a new larger table, walk the old table and
17 * re-hash insert each entry into the new table.
20 * Kern Sibbald, July MMIII
26 Copyright (C) 2003-2005 Kern Sibbald
28 This program is free software; you can redistribute it and/or
29 modify it under the terms of the GNU General Public License
30 version 2 as amended with additional clauses defined in the
31 file LICENSE in the main source directory.
33 This program is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
36 the file LICENSE for additional details.
44 /* ===================================================================
49 * Create hash of key, stored in hash then
50 * create and return the pseudo random bucket index
52 void htable::hash_index(char *key)
55 for (char *p=key; *p; p++) {
56 hash += (hash << 3) + (uint32_t)*p;
58 /* Multiply by large prime number, take top bits, mask for remainder */
59 index = ((hash * 1103515249) >> rshift) & mask;
60 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
64 htable::htable(void *item, void *link, int tsize)
66 init(item, link, tsize);
69 void htable::init(void *item, void *link, int tsize)
73 for (pwr=0; tsize; pwr++) {
76 loffset = (char *)link - (char *)item;
77 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
78 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
79 num_items = 0; /* number of entries in table */
80 buckets = 1<<pwr; /* hash table size -- power of two */
81 max_items = buckets * 4; /* allow average 4 entries per chain */
82 table = (hlink **)malloc(buckets * sizeof(hlink *));
83 memset(table, 0, buckets * sizeof(hlink *));
88 uint32_t htable::size()
94 * Take each hash link and walk down the chain of items
95 * that hash there counting them (i.e. the hits),
96 * then report that number.
97 * Obiously, the more hits in a chain, the more time
98 * it takes to reference them. Empty chains are not so
99 * hot either -- as it means unused or wasted space.
108 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
109 printf("Hits/bucket: buckets\n");
110 for (i=0; i < MAX_COUNT; i++) {
113 for (i=0; i<(int)buckets; i++) {
117 p = (hlink *)(p->next);
127 for (i=0; i < MAX_COUNT; i++) {
128 printf("%2d: %d\n",i, hits[i]);
130 printf("max hits in a bucket = %d\n", max);
133 void htable::grow_table()
135 Dmsg1(100, "Grow called old size = %d\n", buckets);
136 /* Setup a bigger table */
137 htable *big = (htable *)malloc(sizeof(htable));
138 big->loffset = loffset;
139 big->mask = mask<<1 | 1;
140 big->rshift = rshift - 1;
142 big->buckets = buckets * 2;
143 big->max_items = big->buckets * 4;
144 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
145 memset(big->table, 0, big->buckets * sizeof(hlink *));
148 /* Insert all the items in the new hash table */
149 Dmsg1(100, "Before copy num_items=%d\n", num_items);
151 * We walk through the old smaller tree getting items,
152 * but since we are overwriting the colision links, we must
153 * explicitly save the item->next pointer and walk each
154 * colision chain ourselves. We do use next() for getting
155 * to the next bucket.
157 for (void *item=first(); item; ) {
158 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
159 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
160 big->insert(((hlink *)((char *)item+loffset))->key, item);
162 item = (void *)((char *)ni-loffset);
168 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
169 if (num_items != big->num_items) {
170 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
173 memcpy(this, big, sizeof(htable)); /* move everything across */
175 Dmsg0(100, "Exit grow.\n");
178 bool htable::insert(char *key, void *item)
182 return false; /* already exists */
184 sm_check(__FILE__, __LINE__, false);
185 ASSERT(index < buckets);
186 Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
187 hp = (hlink *)(((char *)item)+loffset);
188 Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
189 index, (unsigned)item, loffset);
190 hp->next = table[index];
194 Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
195 (unsigned)hp->next, hp->hash, hp->key);
197 if (++num_items >= max_items) {
198 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
201 sm_check(__FILE__, __LINE__, false);
202 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
206 void *htable::lookup(char *key)
209 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
210 // Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
211 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
212 Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
213 return ((char *)hp)-loffset;
221 Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
223 walkptr = (hlink *)(walkptr->next);
225 while (!walkptr && walk_index < buckets) {
226 walkptr = table[walk_index++];
228 Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
229 (unsigned)(walkptr->next), walk_index-1);
233 Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
234 (unsigned)(((char *)walkptr)-loffset), walk_index);
235 return ((char *)walkptr)-loffset;
237 Dmsg0(100, "next: return NULL\n");
241 void *htable::first()
243 Dmsg0(100, "Enter first\n");
244 walkptr = table[0]; /* get first bucket */
245 walk_index = 1; /* Point to next index */
246 while (!walkptr && walk_index < buckets) {
247 walkptr = table[walk_index++]; /* go to next bucket */
249 Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
250 (unsigned)(walkptr->next), walk_index-1);
254 Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
255 return ((char *)walkptr)-loffset;
257 Dmsg0(100, "Leave first walkptr=NULL\n");
261 /* Destroy the table and its contents */
262 void htable::destroy()
274 Dmsg0(100, "Done destroy.\n");
292 MYJCR *save_jcr = NULL, *item;
296 jcrtbl = (htable *)malloc(sizeof(htable));
297 jcrtbl->init(jcr, &jcr->link, NITEMS);
299 Dmsg1(000, "Inserting %d items\n", NITEMS);
300 for (int i=0; i<NITEMS; i++) {
301 sprintf(mkey, "This is htable item %d", i);
302 jcr = (MYJCR *)malloc(sizeof(MYJCR));
303 Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
304 jcr->key = bstrdup(mkey);
306 jcrtbl->insert(jcr->key, jcr);
311 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
312 printf("Bad news: %s not found.\n", save_jcr->key);
314 printf("Item 10's key is: %s\n", item->key);
318 printf("Walk the hash table:\n");
319 foreach_htable (jcr, jcrtbl) {
320 // printf("htable item = %s\n", jcr->key);
324 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
325 printf("Calling destroy\n");
329 printf("Freed jcrtbl\n");