2 * Bacula hash table routines
4 * htable is a hash table of items (pointers). This code is
5 * adapted and enhanced from code I wrote in 1982 for a
6 * relocatable linker. At that time, the hash table size
7 * was fixed and a primary number, which essentially provides
8 * the randomness. In this program, the hash table can grow when
9 * it gets too full, so the table size here is a binary number. The
10 * hashing is provided using an idea from Tcl where the initial
11 * hash code is "randomized" using a simple calculation from
12 * a random number generator that multiplies by a big number
13 * (I multiply by a prime number, while Tcl did not)
14 * then shifts the result down and does the binary division
15 * by masking. Increasing the size of the hash table is simple.
16 * Just create a new larger table, walk the old table and
17 * re-hash insert each entry into the new table.
20 * Kern Sibbald, July MMIII
26 Bacula® - The Network Backup Solution
28 Copyright (C) 2003-2006 Free Software Foundation Europe e.V.
30 The main author of Bacula is Kern Sibbald, with contributions from
31 many others, a complete list can be found in the file AUTHORS.
32 This program is Free Software; you can redistribute it and/or
33 modify it under the terms of version two of the GNU General Public
34 License as published by the Free Software Foundation and included
37 This program is distributed in the hope that it will be useful, but
38 WITHOUT ANY WARRANTY; without even the implied warranty of
39 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
40 General Public License for more details.
42 You should have received a copy of the GNU General Public License
43 along with this program; if not, write to the Free Software
44 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
47 Bacula® is a registered trademark of John Walker.
48 The licensor of Bacula is the Free Software Foundation Europe
49 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
50 Switzerland, email:ftf@fsfeurope.org.
57 /* ===================================================================
62 * Create hash of key, stored in hash then
63 * create and return the pseudo random bucket index
65 void htable::hash_index(char *key)
68 for (char *p=key; *p; p++) {
69 hash += (hash << 3) + (uint32_t)*p;
71 /* Multiply by large prime number, take top bits, mask for remainder */
72 index = ((hash * 1103515249) >> rshift) & mask;
73 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
77 htable::htable(void *item, void *link, int tsize)
79 init(item, link, tsize);
82 void htable::init(void *item, void *link, int tsize)
86 for (pwr=0; tsize; pwr++) {
89 loffset = (char *)link - (char *)item;
90 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
91 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
92 num_items = 0; /* number of entries in table */
93 buckets = 1<<pwr; /* hash table size -- power of two */
94 max_items = buckets * 4; /* allow average 4 entries per chain */
95 table = (hlink **)malloc(buckets * sizeof(hlink *));
96 memset(table, 0, buckets * sizeof(hlink *));
101 uint32_t htable::size()
107 * Take each hash link and walk down the chain of items
108 * that hash there counting them (i.e. the hits),
109 * then report that number.
110 * Obiously, the more hits in a chain, the more time
111 * it takes to reference them. Empty chains are not so
112 * hot either -- as it means unused or wasted space.
121 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
122 printf("Hits/bucket: buckets\n");
123 for (i=0; i < MAX_COUNT; i++) {
126 for (i=0; i<(int)buckets; i++) {
130 p = (hlink *)(p->next);
140 for (i=0; i < MAX_COUNT; i++) {
141 printf("%2d: %d\n",i, hits[i]);
143 printf("max hits in a bucket = %d\n", max);
146 void htable::grow_table()
148 Dmsg1(100, "Grow called old size = %d\n", buckets);
149 /* Setup a bigger table */
150 htable *big = (htable *)malloc(sizeof(htable));
151 big->loffset = loffset;
152 big->mask = mask<<1 | 1;
153 big->rshift = rshift - 1;
155 big->buckets = buckets * 2;
156 big->max_items = big->buckets * 4;
157 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
158 memset(big->table, 0, big->buckets * sizeof(hlink *));
161 /* Insert all the items in the new hash table */
162 Dmsg1(100, "Before copy num_items=%d\n", num_items);
164 * We walk through the old smaller tree getting items,
165 * but since we are overwriting the colision links, we must
166 * explicitly save the item->next pointer and walk each
167 * colision chain ourselves. We do use next() for getting
168 * to the next bucket.
170 for (void *item=first(); item; ) {
171 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
172 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
173 big->insert(((hlink *)((char *)item+loffset))->key, item);
175 item = (void *)((char *)ni-loffset);
181 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
182 if (num_items != big->num_items) {
183 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
186 memcpy(this, big, sizeof(htable)); /* move everything across */
188 Dmsg0(100, "Exit grow.\n");
191 bool htable::insert(char *key, void *item)
195 return false; /* already exists */
197 sm_check(__FILE__, __LINE__, false);
198 ASSERT(index < buckets);
199 Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
200 hp = (hlink *)(((char *)item)+loffset);
201 Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
202 index, (unsigned)item, loffset);
203 hp->next = table[index];
207 Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
208 (unsigned)hp->next, hp->hash, hp->key);
210 if (++num_items >= max_items) {
211 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
214 sm_check(__FILE__, __LINE__, false);
215 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
219 void *htable::lookup(char *key)
222 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
223 // Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
224 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
225 Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
226 return ((char *)hp)-loffset;
234 Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
236 walkptr = (hlink *)(walkptr->next);
238 while (!walkptr && walk_index < buckets) {
239 walkptr = table[walk_index++];
241 Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
242 (unsigned)(walkptr->next), walk_index-1);
246 Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
247 (unsigned)(((char *)walkptr)-loffset), walk_index);
248 return ((char *)walkptr)-loffset;
250 Dmsg0(100, "next: return NULL\n");
254 void *htable::first()
256 Dmsg0(100, "Enter first\n");
257 walkptr = table[0]; /* get first bucket */
258 walk_index = 1; /* Point to next index */
259 while (!walkptr && walk_index < buckets) {
260 walkptr = table[walk_index++]; /* go to next bucket */
262 Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
263 (unsigned)(walkptr->next), walk_index-1);
267 Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
268 return ((char *)walkptr)-loffset;
270 Dmsg0(100, "Leave first walkptr=NULL\n");
274 /* Destroy the table and its contents */
275 void htable::destroy()
287 Dmsg0(100, "Done destroy.\n");
305 MYJCR *save_jcr = NULL, *item;
309 jcrtbl = (htable *)malloc(sizeof(htable));
310 jcrtbl->init(jcr, &jcr->link, NITEMS);
312 Dmsg1(000, "Inserting %d items\n", NITEMS);
313 for (int i=0; i<NITEMS; i++) {
314 sprintf(mkey, "This is htable item %d", i);
315 jcr = (MYJCR *)malloc(sizeof(MYJCR));
316 Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
317 jcr->key = bstrdup(mkey);
319 jcrtbl->insert(jcr->key, jcr);
324 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
325 printf("Bad news: %s not found.\n", save_jcr->key);
327 printf("Item 10's key is: %s\n", item->key);
331 printf("Walk the hash table:\n");
332 foreach_htable (jcr, jcrtbl) {
333 // printf("htable item = %s\n", jcr->key);
337 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
338 printf("Calling destroy\n");
342 printf("Freed jcrtbl\n");