More hdb tweaks, add radix sort code from mbackes@symas.com

author Howard Chu <hyc@openldap.org>

Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)

committer Howard Chu <hyc@openldap.org>

Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)
author Howard Chu <hyc@openldap.org>
Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)
committer Howard Chu <hyc@openldap.org>
Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)
diff --git a/servers/slapd/back-bdb/dn2id.c b/servers/slapd/back-bdb/dn2id.c

index e7b90ee0df9b61a515558bf10e11b20c6dcf822e..5131fd1a9fe20df68e6d9375533b4cce5f00d5ce 100644 (file)
--- a/servers/slapd/back-bdb/dn2id.c
+++ b/servers/slapd/back-bdb/dn2id.c
@@ -952,13 +952,13 @@ hdb_dn2idl_internal(
         }
  
  saveit:
-       if ( !BDB_IDL_IS_RANGE( cx->tmp ) && cx->tmp[0] > 1 )
+       if ( !BDB_IDL_IS_RANGE( cx->tmp ) && cx->tmp[0] > 3 )
                 bdb_idl_sort( cx->tmp, cx->buf );
         if ( cx->bdb->bi_idl_cache_max_size ) {
                 cx->key.data = &cx->id;
                 bdb_idl_cache_put( cx->bdb, cx->db, &cx->key, cx->tmp, cx->rc );
         }
-       ;
+
  gotit:
         if ( !BDB_IDL_IS_ZERO( cx->tmp )) {
                 if ( cx->prefix == DN_SUBTREE_PREFIX ) {
@@ -1044,7 +1044,7 @@ hdb_dn2idl(
         DBTzero(&cx.data);
  
         hdb_dn2idl_internal(&cx);
-       if ( cx.need_sort && !BDB_IDL_IS_RANGE( cx.ids ) && cx.ids[0] > 1 )
+       if ( cx.need_sort && !BDB_IDL_IS_RANGE( cx.ids ) && cx.ids[0] > 3 )
                 bdb_idl_sort( cx.ids, cx.tmp );
  
         return cx.rc;
diff --git a/servers/slapd/back-bdb/idl.c b/servers/slapd/back-bdb/idl.c

index e293407a5678a6d320211a32e0d3ee6e80d7ce4a..2ec368c24422ccfb2eb5ea1312b7eeaa8417c4c5 100644 (file)
--- a/servers/slapd/back-bdb/idl.c
+++ b/servers/slapd/back-bdb/idl.c
@@ -1207,8 +1207,11 @@ ID bdb_idl_next( ID *ids, ID *cursor )
         return NOID;
  }
  
-/* Add one ID to an unsorted list. We still maintain a lo/hi reference
- * for fast range compaction.
+#ifdef BDB_HIER
+
+/* Add one ID to an unsorted list. We ensure that the first element is the
+ * minimum and the last element is the maximum, for fast range compaction.
+ *   this means IDLs up to length 3 are always sorted...
   */
  int bdb_idl_append_one( ID *ids, ID id )
  {
@@ -1229,15 +1232,17 @@ int bdb_idl_append_one( ID *ids, ID id )
                         tmp = ids[1];
                         ids[1] = id;
                         id = tmp;
-               } else if ( ids[0] > 1 && id > ids[2] ) {
-                       tmp = ids[2];
-                       ids[2] = id;
+               }
+               if ( ids[0] > 1 && id < ids[ids[0]] ) {
+                       tmp = ids[ids[0]];
+                       ids[ids[0]] = id;
                         id = tmp;
                 }
         }
         ids[0]++;
         if ( ids[0] >= BDB_IDL_UM_MAX ) {
                 ids[0] = NOID;
+               ids[2] = id;
         } else {
                 ids[ids[0]] = id;
         }
@@ -1292,6 +1297,8 @@ int bdb_idl_append( ID *a, ID *b )
         return 0;
  }
  
+#if 0
+
  /* Quicksort + Insertion sort for small arrays */
  
  #define SMALL  8
@@ -1359,3 +1366,113 @@ bdb_idl_sort( ID *ids, ID *tmp )
                 }
         }
  }
+
+#else
+
+/* 8 bit Radix sort + insertion sort
+ * 
+ * based on code from http://www.cubic.org/docs/radix.htm
+ * with improvements by mbackes@symas.com and hyc@symas.com
+ *
+ * This code is O(n) but has a relatively high constant factor. For lists
+ * up to ~50 Quicksort is slightly faster; up to ~100 they are even.
+ * Much faster than quicksort for lists longer than ~100. Insertion
+ * sort is actually superior for lists <50.
+ */
+
+#define BUCKETS        (1<<8)
+#define SMALL  50
+
+void
+bdb_idl_sort( ID *ids, ID *tmp )
+{
+       int count, soft_limit, phase = 0, size = ids[0];
+       ID *idls[2], mask, maxval = ids[size];
+
+       if ( BDB_IDL_IS_RANGE( ids ))
+               return;
+
+       /* Use insertion sort for small lists */
+       if ( size <= SMALL ) {
+               int i,j;
+               ID a;
+
+               for (j=1;j<=size;j++) {
+                       a = ids[j];
+                       for (i=j-1;i>=1;i--) {
+                               if (ids[i] <= a) break;
+                               ids[i+1] = ids[i];
+                       }
+                       ids[i+1] = a;
+               }
+               return;
+       }
+
+       tmp[0] = size;
+       idls[0] = ids;
+       idls[1] = tmp;
+
+       soft_limit = sizeof(ID) - 1;
+       mask = (ID)0xff << (sizeof(ID) - 1) * 8;
+
+       while (!(maxval & mask)) {
+               soft_limit--;
+               mask >>= 8;
+       }
+
+       for (
+#if BYTE_ORDER == BIG_ENDIAN
+       count = soft_limit; count >= 0; --count
+#else
+       count = 0; count <= soft_limit; ++count
+#endif
+       ) {
+               unsigned int num[BUCKETS], * np, n, sum;
+               int i;
+        ID *sp, *source, *dest;
+        unsigned char *bp, *source_start;
+
+               source = idls[phase]+1;
+               dest = idls[phase^1]+1;
+               source_start =  ((unsigned char *) source) + count;
+
+        np = num;
+        for ( i = BUCKETS; i > 0; --i ) *np++ = 0;
+
+               /* count occurences of every byte value */
+               bp = source_start;
+        for ( i = size; i > 0; --i, bp += sizeof(ID) )
+                               num[*bp]++;
+
+               /* transform count into index by summing elements and storing
+                * into same array
+                */
+        sum = 0;
+        np = num;
+        for ( i = BUCKETS; i > 0; --i ) {
+                n = *np;
+                *np++ = sum;
+                sum += n;
+        }
+
+               /* fill dest with the right values in the right place */
+               bp = source_start;
+        sp = source;
+        for ( i = size; i > 0; --i, bp += sizeof(ID) ) {
+                np = num + *bp;
+                dest[*np] = *sp++;
+                ++(*np);
+        }
+               phase ^= 1;
+       }
+
+       /* copy back from temp if needed */
+       if ( phase ) {
+               ids++; tmp++;
+               for ( count = 0; count < size; ++count ) 
+                       *ids++ = *tmp++;
+       }
+}
+#endif /* Quick vs Radix */
+
+#endif /* BDB_HIER */
author	Howard Chu <hyc@openldap.org>
	Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)
committer	Howard Chu <hyc@openldap.org>
	Fri, 16 Sep 2005 01:25:40 +0000 (01:25 +0000)
servers/slapd/back-bdb/dn2id.c		patch \| blob \| history
servers/slapd/back-bdb/idl.c		patch \| blob \| history