2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2008 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version two of the GNU General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of John Walker.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
57 /* ===================================================================
63 * This subroutine gets a big buffer.
65 void htable::malloc_buf(int size)
69 hmem = (struct h_mem *)malloc(size);
72 hmem->next = this->mem;
74 hmem->mem = mem->first;
75 hmem->rem = (char *)hmem + size - hmem->mem;
76 Dmsg2(200, "malloc buf size=%d rem=%d\n", size, hmem->rem);
79 char *htable::hash_alloc(int size)
82 int asize = BALIGN(size);
84 if (mem->rem < asize) {
86 if (total_size >= 1000000) {
100 /* This routine frees the whole tree */
101 void htable::hash_free()
103 struct h_mem *hmem, *rel;
105 for (hmem=mem; hmem; ) {
115 * Create hash of key, stored in hash then
116 * create and return the pseudo random bucket index
118 void htable::hash_index(char *key)
121 for (char *p=key; *p; p++) {
122 hash += (hash << 3) + (uint32_t)*p;
124 /* Multiply by large prime number, take top bits, mask for remainder */
125 index = ((hash * 1103515249) >> rshift) & mask;
126 Dmsg2(100, "Leave hash_index hash=0x%x index=%d\n", hash, index);
129 htable::htable(void *item, void *link, int tsize)
131 init(item, link, tsize);
134 void htable::init(void *item, void *link, int tsize)
138 for (pwr=0; tsize; pwr++) {
141 loffset = (char *)link - (char *)item;
142 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
143 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
144 num_items = 0; /* number of entries in table */
145 buckets = 1<<pwr; /* hash table size -- power of two */
146 max_items = buckets * 4; /* allow average 4 entries per chain */
147 table = (hlink **)malloc(buckets * sizeof(hlink *));
148 memset(table, 0, buckets * sizeof(hlink *));
153 malloc_buf(1000000); /* ***FIXME*** base off of size */
157 uint32_t htable::size()
163 * Take each hash link and walk down the chain of items
164 * that hash there counting them (i.e. the hits),
165 * then report that number.
166 * Obiously, the more hits in a chain, the more time
167 * it takes to reference them. Empty chains are not so
168 * hot either -- as it means unused or wasted space.
177 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
178 printf("Hits/bucket: buckets\n");
179 for (i=0; i < MAX_COUNT; i++) {
182 for (i=0; i<(int)buckets; i++) {
186 p = (hlink *)(p->next);
196 for (i=0; i < MAX_COUNT; i++) {
197 printf("%2d: %d\n",i, hits[i]);
199 printf("max hits in a bucket = %d\n", max);
202 void htable::grow_table()
204 Dmsg1(100, "Grow called old size = %d\n", buckets);
205 /* Setup a bigger table */
206 htable *big = (htable *)malloc(sizeof(htable));
207 big->loffset = loffset;
208 big->mask = mask<<1 | 1;
209 big->rshift = rshift - 1;
211 big->buckets = buckets * 2;
212 big->max_items = big->buckets * 4;
213 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
214 memset(big->table, 0, big->buckets * sizeof(hlink *));
217 /* Insert all the items in the new hash table */
218 Dmsg1(100, "Before copy num_items=%d\n", num_items);
220 * We walk through the old smaller tree getting items,
221 * but since we are overwriting the colision links, we must
222 * explicitly save the item->next pointer and walk each
223 * colision chain ourselves. We do use next() for getting
224 * to the next bucket.
226 for (void *item=first(); item; ) {
227 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
228 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
229 big->insert(((hlink *)((char *)item+loffset))->key, item);
231 item = (void *)((char *)ni-loffset);
237 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
238 if (num_items != big->num_items) {
239 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
242 memcpy(this, big, sizeof(htable)); /* move everything across */
244 Dmsg0(100, "Exit grow.\n");
247 bool htable::insert(char *key, void *item)
251 return false; /* already exists */
253 ASSERT(index < buckets);
254 Dmsg2(100, "Insert: hash=0x%x index=%d\n", (unsigned)hash, index);
255 hp = (hlink *)(((char *)item)+loffset);
256 Dmsg4(100, "Insert hp=0x%x index=%d item=0x%x offset=%u\n", (unsigned)hp,
257 index, (unsigned)item, loffset);
258 hp->next = table[index];
262 Dmsg3(100, "Insert hp->next=0x%x hp->hash=0x%x hp->key=%s\n",
263 (unsigned)hp->next, hp->hash, hp->key);
265 if (++num_items >= max_items) {
266 Dmsg2(100, "num_items=%d max_items=%d\n", num_items, max_items);
269 Dmsg3(100, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
273 void *htable::lookup(char *key)
276 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
277 // Dmsg2(100, "hp=0x%x key=%s\n", (long)hp, hp->key);
278 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
279 Dmsg1(100, "lookup return %x\n", ((char *)hp)-loffset);
280 return ((char *)hp)-loffset;
288 Dmsg1(100, "Enter next: walkptr=0x%x\n", (unsigned)walkptr);
290 walkptr = (hlink *)(walkptr->next);
292 while (!walkptr && walk_index < buckets) {
293 walkptr = table[walk_index++];
295 Dmsg3(100, "new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
296 (unsigned)(walkptr->next), walk_index-1);
300 Dmsg2(100, "next: rtn 0x%x walk_index=%d\n",
301 (unsigned)(((char *)walkptr)-loffset), walk_index);
302 return ((char *)walkptr)-loffset;
304 Dmsg0(100, "next: return NULL\n");
308 void *htable::first()
310 Dmsg0(100, "Enter first\n");
311 walkptr = table[0]; /* get first bucket */
312 walk_index = 1; /* Point to next index */
313 while (!walkptr && walk_index < buckets) {
314 walkptr = table[walk_index++]; /* go to next bucket */
316 Dmsg3(100, "first new walkptr=0x%x next=0x%x inx=%d\n", (unsigned)walkptr,
317 (unsigned)(walkptr->next), walk_index-1);
321 Dmsg1(100, "Leave first walkptr=0x%x\n", (unsigned)walkptr);
322 return ((char *)walkptr)-loffset;
324 Dmsg0(100, "Leave first walkptr=NULL\n");
328 /* Destroy the table and its contents */
329 void htable::destroy()
342 Dmsg0(100, "Done destroy.\n");
354 #define NITEMS 1000000
360 MYJCR *save_jcr = NULL, *item;
364 jcrtbl = (htable *)malloc(sizeof(htable));
365 jcrtbl->init(jcr, &jcr->link, NITEMS);
367 Dmsg1(000, "Inserting %d items\n", NITEMS);
368 for (int i=0; i<NITEMS; i++) {
370 len = sprintf(mkey, "This is htable item %d", i) + 1;
373 jcr = (MYJCR *)jcrtbl->hash_alloc(sizeof(MYJCR));
374 jcr->key = (char *)jcrtbl->hash_alloc(len);
376 jcr = (MYJCR *)malloc(sizeof(MYJCR));
377 jcr->key = (char *)malloc(len);
379 memcpy(jcr->key, mkey, len);
380 Dmsg2(100, "link=0x%x jcr=0x%x\n", (unsigned)&jcr->link, (unsigned)jcr);
382 jcrtbl->insert(jcr->key, jcr);
387 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
388 printf("Bad news: %s not found.\n", save_jcr->key);
390 printf("Item 10's key is: %s\n", item->key);
394 printf("Walk the hash table:\n");
395 foreach_htable (jcr, jcrtbl) {
396 // printf("htable item = %s\n", jcr->key);
402 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
403 printf("Calling destroy\n");
407 printf("Freed jcrtbl\n");