2 Bacula® - The Network Backup Solution
4 Copyright (C) 2003-2010 Free Software Foundation Europe e.V.
6 The main author of Bacula is Kern Sibbald, with contributions from
7 many others, a complete list can be found in the file AUTHORS.
8 This program is Free Software; you can redistribute it and/or
9 modify it under the terms of version three of the GNU Affero General Public
10 License as published by the Free Software Foundation and included
13 This program is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 General Public License for more details.
18 You should have received a copy of the GNU Affero General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
23 Bacula® is a registered trademark of Kern Sibbald.
24 The licensor of Bacula is the Free Software Foundation Europe
25 (FSFE), Fiduciary Program, Sumatrastrasse 25, 8006 Zürich,
26 Switzerland, email:ftf@fsfeurope.org.
29 * Bacula hash table routines
31 * htable is a hash table of items (pointers). This code is
32 * adapted and enhanced from code I wrote in 1982 for a
33 * relocatable linker. At that time, the hash table size
34 * was fixed and a primary number, which essentially provides
35 * the randomness. In this program, the hash table can grow when
36 * it gets too full, so the table size here is a binary number. The
37 * hashing is provided using an idea from Tcl where the initial
38 * hash code is "randomized" using a simple calculation from
39 * a random number generator that multiplies by a big number
40 * (I multiply by a prime number, while Tcl did not)
41 * then shifts the result down and does the binary division
42 * by masking. Increasing the size of the hash table is simple.
43 * Just create a new larger table, walk the old table and
44 * re-hash insert each entry into the new table.
47 * Kern Sibbald, July MMIII
54 #define PAGE_SIZE 4096
55 #define MAX_PAGES 2400
56 #define MAX_BUF_SIZE (MAX_PAGES * PAGE_SIZE) /* approx 10MB */
58 static const int dbglvl = 500;
60 /* ===================================================================
66 * This subroutine gets a big buffer.
68 void htable::malloc_big_buf(int size)
72 hmem = (struct h_mem *)malloc(size);
75 hmem->next = mem_block;
77 hmem->mem = mem_block->first;
78 hmem->rem = (char *)hmem + size - hmem->mem;
79 Dmsg3(100, "malloc buf=%p size=%d rem=%d\n", hmem, size, hmem->rem);
82 /* This routine frees the whole tree */
83 void htable::hash_big_free()
85 struct h_mem *hmem, *rel;
87 for (hmem=mem_block; hmem; ) {
90 Dmsg1(100, "free malloc buf=%p\n", rel);
98 * Normal hash malloc routine that gets a
99 * "small" buffer from the big buffer
101 char *htable::hash_malloc(int size)
105 int asize = BALIGN(size);
107 if (mem_block->rem < asize) {
109 if (total_size >= (MAX_BUF_SIZE / 2)) {
110 mb_size = MAX_BUF_SIZE;
112 mb_size = MAX_BUF_SIZE / 2;
114 malloc_big_buf(mb_size);
116 mem_block->rem -= asize;
117 buf = mem_block->mem;
118 mem_block->mem += asize;
123 return (char *)malloc(size);
131 * Create hash of key, stored in hash then
132 * create and return the pseudo random bucket index
134 void htable::hash_index(char *key)
137 for (char *p=key; *p; p++) {
138 // hash += (hash << 3) + (uint32_t)*p;
139 hash += ((hash << 5) | (hash >> (sizeof(hash)*8-5))) + (uint32_t)*p;
141 /* Multiply by large prime number, take top bits, mask for remainder */
142 index = ((hash * 1103515249) >> rshift) & mask;
143 Dmsg2(dbglvl, "Leave hash_index hash=0x%x index=%d\n", hash, index);
147 * tsize is the estimated number of entries in the hash table
149 htable::htable(void *item, void *link, int tsize)
151 init(item, link, tsize);
154 void htable::init(void *item, void *link, int tsize)
158 memset(this, 0, sizeof(htable));
163 for (pwr=0; tsize; pwr++) {
166 loffset = (char *)link - (char *)item;
167 mask = ~((~0)<<pwr); /* 3 bits => table size = 8 */
168 rshift = 30 - pwr; /* start using bits 28, 29, 30 */
169 buckets = 1<<pwr; /* hash table size -- power of two */
170 max_items = buckets * 4; /* allow average 4 entries per chain */
171 table = (hlink **)malloc(buckets * sizeof(hlink *));
172 memset(table, 0, buckets * sizeof(hlink *));
174 malloc_big_buf(MAX_BUF_SIZE); /* ***FIXME*** need variable or some estimate */
178 uint32_t htable::size()
184 * Take each hash link and walk down the chain of items
185 * that hash there counting them (i.e. the hits),
186 * then report that number.
187 * Obiously, the more hits in a chain, the more time
188 * it takes to reference them. Empty chains are not so
189 * hot either -- as it means unused or wasted space.
198 printf("\n\nNumItems=%d\nTotal buckets=%d\n", num_items, buckets);
199 printf("Hits/bucket: buckets\n");
200 for (i=0; i < MAX_COUNT; i++) {
203 for (i=0; i<(int)buckets; i++) {
207 p = (hlink *)(p->next);
217 for (i=0; i < MAX_COUNT; i++) {
218 printf("%2d: %d\n",i, hits[i]);
220 printf("buckets=%d num_items=%d max_items=%d\n", buckets, num_items, max_items);
221 printf("max hits in a bucket = %d\n", max);
223 printf("total bytes malloced = %d\n", total_size);
224 printf("total blocks malloced = %d\n", blocks);
228 void htable::grow_table()
230 Dmsg1(100, "Grow called old size = %d\n", buckets);
231 /* Setup a bigger table */
232 htable *big = (htable *)malloc(sizeof(htable));
233 memcpy(big, this, sizeof(htable)); /* start with original class data */
234 big->loffset = loffset;
235 big->mask = mask<<1 | 1;
236 big->rshift = rshift - 1;
238 big->buckets = buckets * 2;
239 big->max_items = big->buckets * 4;
240 /* Create a bigger hash table */
241 big->table = (hlink **)malloc(big->buckets * sizeof(hlink *));
242 memset(big->table, 0, big->buckets * sizeof(hlink *));
245 /* Insert all the items in the new hash table */
246 Dmsg1(100, "Before copy num_items=%d\n", num_items);
248 * We walk through the old smaller tree getting items,
249 * but since we are overwriting the colision links, we must
250 * explicitly save the item->next pointer and walk each
251 * colision chain ourselves. We do use next() for getting
252 * to the next bucket.
254 for (void *item=first(); item; ) {
255 void *ni = ((hlink *)((char *)item+loffset))->next; /* save link overwritten by insert */
256 Dmsg1(100, "Grow insert: %s\n", ((hlink *)((char *)item+loffset))->key);
257 big->insert(((hlink *)((char *)item+loffset))->key, item);
259 item = (void *)((char *)ni-loffset);
265 Dmsg1(100, "After copy new num_items=%d\n", big->num_items);
266 if (num_items != big->num_items) {
267 Dmsg0(000, "****** Big problems num_items mismatch ******\n");
270 memcpy(this, big, sizeof(htable)); /* move everything across */
272 Dmsg0(100, "Exit grow.\n");
275 bool htable::insert(char *key, void *item)
279 return false; /* already exists */
281 ASSERT(index < buckets);
282 Dmsg2(dbglvl, "Insert: hash=%p index=%d\n", hash, index);
283 hp = (hlink *)(((char *)item)+loffset);
284 Dmsg4(dbglvl, "Insert hp=%p index=%d item=%p offset=%u\n", hp,
285 index, item, loffset);
286 hp->next = table[index];
290 Dmsg3(dbglvl, "Insert hp->next=%p hp->hash=0x%x hp->key=%s\n",
291 hp->next, hp->hash, hp->key);
293 if (++num_items >= max_items) {
294 Dmsg2(dbglvl, "num_items=%d max_items=%d\n", num_items, max_items);
297 Dmsg3(dbglvl, "Leave insert index=%d num_items=%d key=%s\n", index, num_items, key);
301 void *htable::lookup(char *key)
304 for (hlink *hp=table[index]; hp; hp=(hlink *)hp->next) {
305 // Dmsg2(100, "hp=%p key=%s\n", hp, hp->key);
306 if (hash == hp->hash && strcmp(key, hp->key) == 0) {
307 Dmsg1(dbglvl, "lookup return %p\n", ((char *)hp)-loffset);
308 return ((char *)hp)-loffset;
316 Dmsg1(dbglvl, "Enter next: walkptr=%p\n", walkptr);
318 walkptr = (hlink *)(walkptr->next);
320 while (!walkptr && walk_index < buckets) {
321 walkptr = table[walk_index++];
323 Dmsg3(dbglvl, "new walkptr=%p next=%p inx=%d\n", walkptr,
324 walkptr->next, walk_index-1);
328 Dmsg2(dbglvl, "next: rtn %p walk_index=%d\n",
329 ((char *)walkptr)-loffset, walk_index);
330 return ((char *)walkptr)-loffset;
332 Dmsg0(dbglvl, "next: return NULL\n");
336 void *htable::first()
338 Dmsg0(dbglvl, "Enter first\n");
339 walkptr = table[0]; /* get first bucket */
340 walk_index = 1; /* Point to next index */
341 while (!walkptr && walk_index < buckets) {
342 walkptr = table[walk_index++]; /* go to next bucket */
344 Dmsg3(dbglvl, "first new walkptr=%p next=%p inx=%d\n", walkptr,
345 walkptr->next, walk_index-1);
349 Dmsg1(dbglvl, "Leave first walkptr=%p\n", walkptr);
350 return ((char *)walkptr)-loffset;
352 Dmsg0(dbglvl, "Leave first walkptr=NULL\n");
356 /* Destroy the table and its contents */
357 void htable::destroy()
374 Dmsg0(100, "Done destroy.\n");
386 #define NITEMS 5000000
392 MYJCR *save_jcr = NULL, *item;
396 jcrtbl = (htable *)malloc(sizeof(htable));
397 jcrtbl->init(jcr, &jcr->link, NITEMS);
399 Dmsg1(000, "Inserting %d items\n", NITEMS);
400 for (int i=0; i<NITEMS; i++) {
402 len = sprintf(mkey, "This is htable item %d", i) + 1;
404 jcr = (MYJCR *)jcrtbl->hash_malloc(sizeof(MYJCR));
405 jcr->key = (char *)jcrtbl->hash_malloc(len);
406 memcpy(jcr->key, mkey, len);
407 Dmsg2(100, "link=%p jcr=%p\n", jcr->link, jcr);
409 jcrtbl->insert(jcr->key, jcr);
414 if (!(item = (MYJCR *)jcrtbl->lookup(save_jcr->key))) {
415 printf("Bad news: %s not found.\n", save_jcr->key);
417 printf("Item 10's key is: %s\n", item->key);
421 printf("Walk the hash table:\n");
422 foreach_htable (jcr, jcrtbl) {
423 // printf("htable item = %s\n", jcr->key);
429 printf("Got %d items -- %s\n", count, count==NITEMS?"OK":"***ERROR***");
430 printf("Calling destroy\n");
434 printf("Freed jcrtbl\n");
436 sm_dump(false); /* unit test */