2 * Bacula hash table routines
4 * htable is a hash table of items (pointers). This code is
5 * adapted and enhanced from code I wrote in 1982 for a
6 * relocatable linker. At that time, the hash table size
7 * was fixed and a primary number, which essentially provides
8 * the randomness. In this program, the hash table can grow when
9 * it gets too full, so the table size here is a binary number. The
10 * hashing is provided using an idea from Tcl where the initial
11 * hash code is "randomized" using a simple calculation from
12 * a random number generator that multiplies by a big number
13 * (I multiply by a prime number, while Tcl did not)
14 * then shifts the result down and does the binary division
15 * by masking. Increasing the size of the hash table is simple.
16 * Just create a new larger table, walk the old table and
17 * re-hash insert each entry into the new table.
20 * Kern Sibbald, July MMIII
26 Copyright (C) 2003-2005 Kern Sibbald
28 This program is free software; you can redistribute it and/or
29 modify it under the terms of the GNU General Public License as
30 published by the Free Software Foundation; either version 2 of
31 the License, or (at your option) any later version.
33 This program is distributed in the hope that it will be useful,
34 but WITHOUT ANY WARRANTY; without even the implied warranty of
35 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
36 General Public License for more details.
38 You should have received a copy of the GNU General Public
39 License along with this program; if not, write to the Free
40 Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
49 /* ===================================================================
54 * Create hash of key, stored in hash then
55 * create and return the pseudo random bucket index
57 void htable::hash_index(char *key)
60 for (char *p=key; *p; p++) {
61 hash += (hash << 3) + (uint32_t)*p;
63 /* Multiply by large prime number, take top bits, mask for remainder */
64 index = ((hash * 1103515249) >> rshift) & mask;
65 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
69 htable::htable(void *item, void *link, int tsize)
71 init(item, link, tsize);
74 void htable::init(void *item, void *link, int tsize)
78 for (pwr=0; tsize; pwr++) {
81 loffset = (char *)link - (char *)item;
82 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
83 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
84 num_items = 0; /* number of entries in table */
85 buckets = 1<<pwr; /* hash table size -- power of two */
86 max_items = buckets * 4; /* allow average 4 entries per chain */
87 table = (hlink **)malloc(buckets * sizeof(hlink *));
88 memset(table, 0, buckets * sizeof(hlink *));
93 uint32_t htable::size()
99 * Take each hash link and walk down the chain of items
100 * that hash there counting them (i.e. the hits),
101 * then report that number.
102 * Obiously, the more hits in a chain, the more time
103 * it takes to reference them. Empty chains are not so
104 * hot either -- as it means unused or wasted space.
113 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
114 printf("Hits/bucket: buckets\n");
115 for (i=0; i < MAX_COUNT; i++) {
118 for (i=0; i<(int)buckets; i++) {
122 p = (hlink *)(p->next);
132 for (i=0; i < MAX_COUNT; i++) {
133 printf("%2d: %d\n",i, hits[i]);
135 printf("max hits in a bucket = %d\n", max);
138 void htable::grow_table()
140 Dmsg1(100, "Grow called old size = %d\n", buckets);
141 /* Setup a bigger table */
142 htable *big = (htable *)malloc(sizeof(htable));
143 big->loffset = loffset;
144 big->mask = mask<<1 | 1;
145 big->rshift = rshift - 1;
147 big->buckets = buckets * 2;
148 big->max_items = big->buckets * 4;
149 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
150 memset(big->table, 0, big->buckets * sizeof(hlink *));
153 /* Insert all the items in the new hash table */
154 Dmsg1(100, "Before copy num_items=%d\n", num_items);
156 * We walk through the old smaller tree getting items,
157 * but since we are overwriting the colision links, we must
158 * explicitly save the item->next pointer and walk each
159 * colision chain ourselves. We do use next() for getting
160 * to the next bucket.
162 for (void *item=first(); item; ) {
163 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
164 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
165 big->insert(((hlink *)((char *)item+loffset))->key, item);
167 item = (void *)((char *)ni-loffset);
173 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
174 if (num_items != big->num_items) {
175 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
178 memcpy(this, big, sizeof(htable)); /* move everything across */
180 Dmsg0(100, "Exit grow.\n");
183 bool htable::insert(char *key, void *item)
187 return false; /* already exists */
189 sm_check(__FILE__, __LINE__, false);
190 ASSERT(index < buckets);
191 Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
192 hp = (hlink *)(((char *)item)+loffset);
193 Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
194 index, (unsigned)item, loffset);
195 hp->next = table[index];
199 Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
200 (unsigned)hp->next, hp->hash, hp->key);
202 if (++num_items >= max_items) {
203 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
206 sm_check(__FILE__, __LINE__, false);
207 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
211 void *htable::lookup(char *key)
214 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
215 // Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
216 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
217 Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
218 return ((char *)hp)-loffset;
226 Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
228 walkptr = (hlink *)(walkptr->next);
230 while (!walkptr && walk_index < buckets) {
231 walkptr = table[walk_index++];
233 Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
234 (unsigned)(walkptr->next), walk_index-1);
238 Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
239 (unsigned)(((char *)walkptr)-loffset), walk_index);
240 return ((char *)walkptr)-loffset;
242 Dmsg0(100, "next: return NULL\n");
246 void *htable::first()
248 Dmsg0(100, "Enter first\n");
249 walkptr = table[0]; /* get first bucket */
250 walk_index = 1; /* Point to next index */
251 while (!walkptr && walk_index < buckets) {
252 walkptr = table[walk_index++]; /* go to next bucket */
254 Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
255 (unsigned)(walkptr->next), walk_index-1);
259 Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
260 return ((char *)walkptr)-loffset;
262 Dmsg0(100, "Leave first walkptr=NULL\n");
266 /* Destroy the table and its contents */
267 void htable::destroy()
279 Dmsg0(100, "Done destroy.\n");
297 MYJCR *save_jcr = NULL, *item;
301 jcrtbl = (htable *)malloc(sizeof(htable));
302 jcrtbl->init(jcr, &jcr->link, NITEMS);
304 Dmsg1(000, "Inserting %d items\n", NITEMS);
305 for (int i=0; i<NITEMS; i++) {
306 sprintf(mkey, "This is htable item %d", i);
307 jcr = (MYJCR *)malloc(sizeof(MYJCR));
308 Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
309 jcr->key = bstrdup(mkey);
311 jcrtbl->insert(jcr->key, jcr);
316 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
317 printf("Bad news: %s not found.\n", save_jcr->key);
319 printf("Item 10's key is: %s\n", item->key);
323 printf("Walk the hash table:\n");
324 foreach_htable (jcr, jcrtbl) {
325 // printf("htable item = %s\n", jcr->key);
329 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
330 printf("Calling destroy\n");
334 printf("Freed jcrtbl\n");