2 * Bacula hash table routines
4 * htable is a hash table of items (pointers). This code is
5 * adapted and enhanced from code I wrote in 1982 for a
6 * relocatable linker. At that time, the hash table size
7 * was fixed and a primary number, which essentially provides
8 * the randomness. In this program, the hash table can grow when
9 * it gets too full, so the table size here is a binary number. The
10 * hashing is provided using an idea from Tcl where the initial
11 * hash code is "randomized" using a simple calculation from
12 * a random number generator that multiplies by a big number
13 * (I multiply by a prime number, while Tcl did not)
14 * then shifts the result down and does the binary division
15 * by masking. Increasing the size of the hash table is simple.
16 * Just create a new larger table, walk the old table and
17 * re-hash insert each entry into the new table.
20 * Kern Sibbald, July MMIII
26 Bacula® - The Network Backup Solution
28 Copyright (C) 2003-2006 Free Software Foundation Europe e.V.
30 The main author of Bacula is Kern Sibbald, with contributions from
31 many others, a complete list can be found in the file AUTHORS.
32 This program is Free Software; you can redistribute it and/or
33 modify it under the terms of version two of the GNU General Public
34 License as published by the Free Software Foundation and included
37 This program is distributed in the hope that it will be useful, but
38 WITHOUT ANY WARRANTY; without even the implied warranty of
39 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
40 General Public License for more details.
42 You should have received a copy of the GNU General Public License
43 along with this program; if not, write to the Free Software
44 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
47 Bacula® is a registered trademark of John Walker.
48 The licensor of Bacula is the Free Software Foundation Europe
49 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
50 Switzerland, email:ftf@fsfeurope.org.
57 /* ===================================================================
62 * Create hash of key, stored in hash then
63 * create and return the pseudo random bucket index
65 void htable::hash_index(char *key)
68 for (char *p=key; *p; p++) {
69 hash += (hash << 3) + (uint32_t)*p;
71 /* Multiply by large prime number, take top bits, mask for remainder */
72 index = ((hash * 1103515249) >> rshift) & mask;
73 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
76 htable::htable(void *item, void *link, int tsize)
78 init(item, link, tsize);
81 void htable::init(void *item, void *link, int tsize)
85 for (pwr=0; tsize; pwr++) {
88 loffset = (char *)link - (char *)item;
89 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
90 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
91 num_items = 0; /* number of entries in table */
92 buckets = 1<<pwr; /* hash table size -- power of two */
93 max_items = buckets * 4; /* allow average 4 entries per chain */
94 table = (hlink **)malloc(buckets * sizeof(hlink *));
95 memset(table, 0, buckets * sizeof(hlink *));
100 uint32_t htable::size()
106 * Take each hash link and walk down the chain of items
107 * that hash there counting them (i.e. the hits),
108 * then report that number.
109 * Obiously, the more hits in a chain, the more time
110 * it takes to reference them. Empty chains are not so
111 * hot either -- as it means unused or wasted space.
120 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
121 printf("Hits/bucket: buckets\n");
122 for (i=0; i < MAX_COUNT; i++) {
125 for (i=0; i<(int)buckets; i++) {
129 p = (hlink *)(p->next);
139 for (i=0; i < MAX_COUNT; i++) {
140 printf("%2d: %d\n",i, hits[i]);
142 printf("max hits in a bucket = %d\n", max);
145 void htable::grow_table()
147 Dmsg1(100, "Grow called old size = %d\n", buckets);
148 /* Setup a bigger table */
149 htable *big = (htable *)malloc(sizeof(htable));
150 big->loffset = loffset;
151 big->mask = mask<<1 | 1;
152 big->rshift = rshift - 1;
154 big->buckets = buckets * 2;
155 big->max_items = big->buckets * 4;
156 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
157 memset(big->table, 0, big->buckets * sizeof(hlink *));
160 /* Insert all the items in the new hash table */
161 Dmsg1(100, "Before copy num_items=%d\n", num_items);
163 * We walk through the old smaller tree getting items,
164 * but since we are overwriting the colision links, we must
165 * explicitly save the item->next pointer and walk each
166 * colision chain ourselves. We do use next() for getting
167 * to the next bucket.
169 for (void *item=first(); item; ) {
170 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
171 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
172 big->insert(((hlink *)((char *)item+loffset))->key, item);
174 item = (void *)((char *)ni-loffset);
180 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
181 if (num_items != big->num_items) {
182 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
185 memcpy(this, big, sizeof(htable)); /* move everything across */
187 Dmsg0(100, "Exit grow.\n");
190 bool htable::insert(char *key, void *item)
194 return false; /* already exists */
196 ASSERT(index < buckets);
197 Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
198 hp = (hlink *)(((char *)item)+loffset);
199 Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
200 index, (unsigned)item, loffset);
201 hp->next = table[index];
205 Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
206 (unsigned)hp->next, hp->hash, hp->key);
208 if (++num_items >= max_items) {
209 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
212 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
216 void *htable::lookup(char *key)
219 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
220 // Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
221 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
222 Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
223 return ((char *)hp)-loffset;
231 Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
233 walkptr = (hlink *)(walkptr->next);
235 while (!walkptr && walk_index < buckets) {
236 walkptr = table[walk_index++];
238 Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
239 (unsigned)(walkptr->next), walk_index-1);
243 Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
244 (unsigned)(((char *)walkptr)-loffset), walk_index);
245 return ((char *)walkptr)-loffset;
247 Dmsg0(100, "next: return NULL\n");
251 void *htable::first()
253 Dmsg0(100, "Enter first\n");
254 walkptr = table[0]; /* get first bucket */
255 walk_index = 1; /* Point to next index */
256 while (!walkptr && walk_index < buckets) {
257 walkptr = table[walk_index++]; /* go to next bucket */
259 Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
260 (unsigned)(walkptr->next), walk_index-1);
264 Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
265 return ((char *)walkptr)-loffset;
267 Dmsg0(100, "Leave first walkptr=NULL\n");
271 /* Destroy the table and its contents */
272 void htable::destroy()
285 Dmsg0(100, "Done destroy.\n");
303 MYJCR *save_jcr = NULL, *item;
307 jcrtbl = (htable *)malloc(sizeof(htable));
308 jcrtbl->init(jcr, &jcr->link, NITEMS);
310 Dmsg1(000, "Inserting %d items\n", NITEMS);
311 for (int i=0; i<NITEMS; i++) {
312 sprintf(mkey, "This is htable item %d", i);
313 jcr = (MYJCR *)malloc(sizeof(MYJCR));
314 Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
315 jcr->key = bstrdup(mkey);
317 jcrtbl->insert(jcr->key, jcr);
322 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
323 printf("Bad news: %s not found.\n", save_jcr->key);
325 printf("Item 10's key is: %s\n", item->key);
329 printf("Walk the hash table:\n");
330 foreach_htable (jcr, jcrtbl) {
331 // printf("htable item = %s\n", jcr->key);
335 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
336 printf("Calling destroy\n");
340 printf("Freed jcrtbl\n");