1 /* centipede.c - generate and install indexing information (view w/tabstop=4) */
10 #include <ac/string.h>
12 #include <ac/unistd.h> /* get link(), unlink() */
19 #define DEFAULT_LDAPFILTER "(objectclass=*)"
21 #define CENTROID_VALUE 1
22 #define CENTROID_WORD 2
24 #define CENTROID_RELATIVE 1
25 #define CENTROID_FULL 2
27 #define WORD_BREAKS " -',.()!;:&$%*\"/\\+_<>=?[]|^~"
40 int srcldapauthmethod;
41 int destldapauthmethod;
45 static LDAP *start_ldap_search(char *ldapsrcurl, char *ldapfilter, char **attrs);
46 static LDAP *bind_to_destination_ldap(char *ldapsrcurl, char *ldapdesturl);
47 static int create_tmp_files(char **attrs, char ***tmpfile, LDBM **ldbm);
48 static int generate_new_centroids(LDAP *ld, char **attrs, LDBM *ldbm);
49 static LDAPMod **diff_centroids(char *attr, LDBM oldbm, LDBM nldbm, int nentries);
50 static LDAPMod **full_centroid(char *attr, LDBM ldbm, int nentries);
51 static char **charray_add_dup(char ***a, int *cur, int *max, char *s);
53 static void usage( char *name )
55 fprintf( stderr, "usage: %s [options] -s url -d url attributes\n", name );
56 fprintf( stderr, "where:\n" );
57 fprintf( stderr, "\t-s url\t\t[[ldap://][host[:port]]/]searchbasedn\n");
58 fprintf( stderr, "\t-d url\t\t[[ldap://][host[:port]]/]centroidentrydn\n");
59 fprintf( stderr, "options:\n" );
60 fprintf( stderr, "\t-v \t\tturn on verbose mode\n" );
61 fprintf( stderr, "\t-n \t\tgenerate, but do not install index info\n" );
62 fprintf( stderr, "\t-f filter\tentry selection filter\n" );
63 fprintf( stderr, "\t-F \t\tgenerate a full centroid\n" );
64 fprintf( stderr, "\t-R \t\tgenerate a relative centroid\n" );
65 fprintf( stderr, "\t-w \t\tgenerate a word-based centroid\n" );
66 fprintf( stderr, "\t-t directory\tcentroid directory\n" );
67 fprintf( stderr, "\t-b binddn\tsource bind dn\n" );
68 fprintf( stderr, "\t-p passwd\tsource bind passwd (for simple auth)\n" );
69 fprintf( stderr, "\t-m authmethod\tsource authmethod \"simple\" or \"kerberos\"\n" );
70 fprintf( stderr, "\t-B binddn\tdestination bind dn\n" );
71 fprintf( stderr, "\t-P passwd\tdestination bind passwd (for simple auth)\n" );
72 fprintf( stderr, "\t-M authmethod\tdestination authmethod \"simple\" or \"kerberos\"\n" );
73 fprintf( stderr, "\t-c size\t\tldbm cache size\n" );
77 main( int argc, char **argv )
80 char *ldapsrcurl, *ldapdesturl;
95 srcldapauthmethod = LDAP_AUTH_SIMPLE;
96 destldapauthmethod = LDAP_AUTH_SIMPLE;
99 destldapbinddn = NULL;
100 destldappasswd = NULL;
101 ldapfilter = DEFAULT_LDAPFILTER;
102 centroidvalues = CENTROID_VALUE;
103 centroidtype = CENTROID_RELATIVE;
108 while ( (i = getopt( argc, argv, "s:d:c:b:B:f:FRWp:P:m:M:t:vwn" ))
111 case 's': /* source url [[ldap://][host[:port]]/]basedn */
112 ldapsrcurl = strdup( optarg );
115 case 'd': /* destination url [[ldap://][host[:port]]/]entrydn */
116 ldapdesturl = strdup( optarg );
119 case 'f': /* specify a filter */
120 ldapfilter = strdup( optarg );
123 case 'F': /* generate full centroid */
124 centroidtype = CENTROID_FULL;
127 case 'R': /* generate relative centroid */
128 centroidtype = CENTROID_RELATIVE;
131 case 'w': /* generate word centroid */
132 centroidvalues = CENTROID_WORD;
135 case 'W': /* generate weights */
139 case 't': /* temp file directory */
140 centdir = strdup( optarg );
143 case 'b': /* src bind dn */
144 srcldapbinddn = strdup( optarg );
147 case 'p': /* src bind password */
148 srcldappasswd = strdup( optarg );
153 case 'B': /* dest bind dn */
154 destldapbinddn = strdup( optarg );
157 case 'P': /* dest bind password */
158 destldappasswd = strdup( optarg );
163 case 'm': /* src bind method */
164 if ( strcasecmp( optarg, "simple" ) == 0 ) {
165 srcldapauthmethod = LDAP_AUTH_SIMPLE;
166 } else if ( strcasecmp( optarg, "kerberos" ) == 0 ) {
167 srcldapauthmethod = LDAP_AUTH_KRBV4;
169 fprintf( stderr, "%s: unknown auth method\n", optarg );
170 fputs( "expecting \"simple\" or \"kerberos\"\n", stderr );
171 exit( EXIT_FAILURE );
175 case 'M': /* dest bind method */
176 if ( strcasecmp( optarg, "simple" ) == 0 ) {
177 destldapauthmethod = LDAP_AUTH_SIMPLE;
178 } else if ( strcasecmp( optarg, "kerberos" ) == 0 ) {
179 destldapauthmethod = LDAP_AUTH_KRBV4;
181 fprintf( stderr, "%s: unknown auth method\n", optarg );
182 fputs( "expecting \"simple\" or \"kerberos\"\n", stderr );
183 exit( EXIT_FAILURE );
187 case 'c': /* ldbm cache size */
188 ldbmcachesize = atoi( optarg );
191 case 'v': /* turn on verbose mode */
195 case 'n': /* don't actually install index info */
201 exit( EXIT_FAILURE );
204 if ( optind == argc || ldapsrcurl == NULL || ldapdesturl == NULL ) {
206 exit( EXIT_FAILURE );
208 attrs = &argv[optind];
211 * open the ldap connection and start searching for the entries
212 * we will use to generate the centroids.
215 if ( (ld = start_ldap_search( ldapsrcurl, ldapfilter, attrs )) == NULL ) {
216 fprintf( stderr, "could not initiate ldap search\n" );
217 exit( EXIT_FAILURE );
220 if ( create_tmp_files( attrs, &tmpfile, &ldbm ) != 0 ) {
221 fprintf( stderr, "could not create temp files\n" );
222 exit( EXIT_FAILURE );
226 * go through the entries returned, building a centroid for each
227 * attribute as we go.
230 if ( (count = generate_new_centroids( ld, attrs, ldbm )) < 1 ) {
232 fprintf( stderr, "no entries matched\n" );
233 exit( EXIT_SUCCESS );
235 fprintf( stderr, "could not generate new centroid\n" );
236 exit( EXIT_FAILURE );
241 * for each centroid we generated above, compare to the existing
242 * centroid, if any, and produce adds and deletes, or produce
243 * an entirely new centroid. in either case, update the "current"
244 * centroid version with the new one we just generated.
247 if ( (ld = bind_to_destination_ldap( ldapsrcurl, ldapdesturl )) == NULL ) {
249 "could not bind to index server, or could not create index entry\n" );
250 exit( EXIT_FAILURE );
253 for ( i = 0; ldbm[i] != NULL; i++ ) {
254 /* generate the name of the existing centroid, if any */
255 s = strrchr( tmpfile[i], '/' );
257 sprintf( buf, "%s/cent.%s", tmpfile[i], attrs[i] );
260 /* generate the full centroid changes */
261 if ( centroidtype == CENTROID_FULL || (oldbm = ldbm_open( buf,
262 LDBM_WRITER, 0, ldbmcachesize )) == NULL ) {
263 if ( (mods = full_centroid( attrs[i], ldbm[i], count )) == NULL ) {
264 fprintf( stderr, "could not produce full centroid for %s\n",
269 /* generate the differential centroid changes */
271 if ( (mods = diff_centroids( attrs[i], oldbm, ldbm[i], count ))
273 fprintf( stderr, "could not diff centroids\n" );
281 printf("changes:\n");
282 for ( j = 0; mods[j] != NULL; j++ ) {
283 switch( mods[j]->mod_op ) {
285 printf( "\tadd: %s\n",mods[j]->mod_type );
287 case LDAP_MOD_DELETE:
288 printf( "\tdelete: %s\n",mods[j]->mod_type );
290 case LDAP_MOD_REPLACE:
291 printf( "\treplace: %s\n",mods[j]->mod_type );
294 if ( mods[j]->mod_values != NULL ) {
295 for ( k = 0; mods[j]->mod_values[k] != NULL; k++ ) {
296 printf( "\t\t%s\n", mods[j]->mod_values[k] );
300 printf("end changes:\n");
304 printf( "%sModifying centroid...", not ? "Not " : "" );
308 /* attempt to make the changes to the index server entry */
309 if ( !not && ldap_modify_s( ld, ldapbase, mods ) != LDAP_SUCCESS ) {
310 fprintf( stderr, "could not apply centroid modification for %s\n",
312 ldap_perror( ld, ldapbase );
314 ldap_mods_free( mods, 1 );
321 /* move the new centroid into the old one's place */
323 (void) unlink( buf );
324 if ( link( tmpfile[i], buf ) != 0 ) {
326 fprintf( stderr, "could not rename %s to %s\n", buf,
331 (void) unlink( tmpfile[i] );
335 for ( i = 0; attrs[i] != NULL; i++ ) {
336 ldbm_close( ldbm[i] );
342 exit( EXIT_SUCCESS );
346 * open an ldap connection, bind, and initiate the search
360 if ( strncmp( ldapsrcurl, "ldap://", 7 ) != 0 ) {
361 fputs( "Not an LDAP URL", stderr ); /* Should be smarter? */
365 if ( (s2 = strchr( s, '/' )) == NULL ) {
366 ldapbase = strdup( s );
370 ldaphost = strdup( s );
373 ldapbase = strdup( s2 + 1 );
377 printf( "Base: %s\n", ldapbase );
378 printf( "Attributes:" );
379 for ( i = 0; attrs[i] != NULL; i++ ) {
380 printf( " %s", attrs[i] );
383 printf( "Binding to source LDAP server..." );
387 if ( (ld = ldap_init( ldaphost, 0 )) == NULL ) {
388 perror( "ldap_init" );
392 if ( ldap_bind_s( ld, srcldapbinddn, srcldappasswd, srcldapauthmethod )
394 ldap_perror( ld, "ldap_bind_s" );
399 printf( "\nInitiating search..." );
400 if ( ldap_search( ld, ldapbase, LDAP_SCOPE_SUBTREE, ldapfilter, attrs, 0 )
402 ldap_perror( ld, "ldap_search" );
415 * create the temporary ldbm files we will use to hold the new centroids
427 for ( i = 0; attrs[i] != NULL; i++ )
431 if ( (*tmpfile = (char **) malloc( i * sizeof(char *) )) == NULL ) {
435 if ( (*ldbm = (LDBM *) malloc( i * sizeof(LDBM) )) == NULL ) {
439 for ( i = 0; attrs[i] != NULL; i++ ) {
440 if ( ((*tmpfile)[i] = tempnam( centdir, NULL )) == NULL ) {
445 if ( ((*ldbm)[i] = ldbm_open( (*tmpfile)[i], LDBM_WRCREAT, 0600,
446 ldbmcachesize )) == NULL ) {
447 fprintf( stderr, "ldbm_open of \"%s\" failed\n", (*tmpfile)[i] );
448 perror( "ldbm_open" );
452 (*tmpfile)[i] = NULL;
459 * step through each entry returned from the search and generate
460 * the appropriate centroid values.
464 generate_new_centroids(
472 LDAPMessage *res, *e;
477 ldbm_datum_init( data );
480 printf( "Generating new centroids for..." );
487 while ( (rc = ldap_result( ld, LDAP_RES_ANY, 0, NULL, &res ))
488 == LDAP_RES_SEARCH_ENTRY ) {
490 e = ldap_first_entry( ld, res );
491 dn = ldap_get_dn( ld, e );
493 /* for each attr we want to generate a centroid for */
494 for ( i = 0; attrs[i] != NULL; i++ ) {
495 if ( (val = ldap_get_values( ld, e, attrs[i] )) == NULL ) {
500 for ( j = 0; val[j] != NULL; j++ ) {
502 ldbm_datum_init( key );
504 /* normalize the value */
505 for ( s = val[j]; *s; s++ ) {
506 *s = TOLOWER( (unsigned char) *s );
509 if ( isascii( last ) && isdigit( last ) ) {
513 /* generate a value-based centroid */
514 if ( centroidvalues == CENTROID_VALUE ) {
516 key.dsize = strlen( key.dptr ) + 1;
517 (void) ldbm_store( ldbm[i], key, data, LDBM_INSERT );
519 /* generate a word-based centroid */
522 for ( w = ldap_pvt_strtok( val[j], WORD_BREAKS, &lasts );
524 w = ldap_pvt_strtok( NULL, WORD_BREAKS, &lasts ) ) {
526 key.dsize = strlen( key.dptr ) + 1;
527 (void) ldbm_store( ldbm[i], key, data, LDBM_INSERT );
531 ldap_value_free( val );
540 printf( "%d entries\n", count );
547 * compare the old and new centroids, generating the appropriate add
548 * and delete operations. if the underlying database is ordered, we
549 * can do this more efficiently.
567 char **avals, **dvals;
568 int amax, acur, dmax, dcur;
571 LDBMCursor *ocursorp;
572 LDBMCursor *ncursorp;
575 printf( "Generating mods for differential %s centroid...", attr );
579 ldbm_datum_init( lastkey );
580 ldbm_datum_init( key );
581 ldbm_datum_init( data );
583 if ( (mods = (LDAPMod **) malloc( sizeof(LDAPMod *) * 4 )) == NULL ||
584 (mods[0] = (LDAPMod *) malloc( sizeof(LDAPMod) )) == NULL ||
585 (mods[1] = (LDAPMod *) malloc( sizeof(LDAPMod) )) == NULL ||
586 (mods[2] = (LDAPMod *) malloc( sizeof(LDAPMod) )) == NULL ||
587 (vals = (char **) malloc( 2 * sizeof(char *) )) == NULL ||
588 (vals[0] = (char *) malloc( 20 )) == NULL )
591 exit( EXIT_FAILURE );
593 /* add values in mods[0] */
594 mods[0]->mod_op = LDAP_MOD_ADD;
595 mods[0]->mod_type = attr;
596 mods[0]->mod_values = NULL;
599 /* delete values in mods[1] */
600 mods[1]->mod_op = LDAP_MOD_DELETE;
601 mods[1]->mod_type = attr;
602 mods[1]->mod_values = NULL;
605 /* number of entries in mods[2] */
606 sprintf( vals[0], "%d", nentries );
608 mods[2]->mod_op = LDAP_MOD_REPLACE;
609 mods[2]->mod_type = "nentries";
610 mods[2]->mod_values = vals;
611 /* null terminate list of mods */
616 * if the underlying database is ordered, we can do a more efficient
617 * dual traversal, yielding O(N) performance.
620 ldbm_datum_init( okey );
621 ldbm_datum_init( nkey );
622 ldbm_datum_init( olast );
623 ldbm_datum_init( nlast );
628 for ( okey = ldbm_firstkey( oldbm, &ocursorp ),
629 nkey = ldbm_firstkey( nldbm, &ncursorp );
630 okey.dptr != NULL && nkey.dptr != NULL; )
632 int rc = strcmp( okey.dptr, nkey.dptr );
635 /* value is in both places - leave it */
636 if ( olast.dptr != NULL ) {
637 ldbm_datum_free( oldbm, olast );
640 if ( nlast.dptr != NULL ) {
641 ldbm_datum_free( nldbm, nlast );
645 okey = ldbm_nextkey( oldbm, olast, ocursorp );
646 nkey = ldbm_nextkey( nldbm, nlast, ncursorp );
648 } else if ( rc > 0 ) {
649 /* new value is not in old centroid - add it */
650 if ( charray_add_dup( &avals, &acur, &amax, nkey.dptr ) == NULL ) {
651 ldap_mods_free( mods, 1 );
655 if ( nlast.dptr != NULL ) {
656 ldbm_datum_free( nldbm, nlast );
660 nkey = ldbm_nextkey( nldbm, nlast, ncursorp );
663 /* old value is not in new centroid - delete it */
664 if ( charray_add_dup( &dvals, &dcur, &dmax, okey.dptr ) == NULL ) {
665 ldap_mods_free( mods, 1 );
669 if ( olast.dptr != NULL ) {
670 ldbm_datum_free( oldbm, olast );
674 okey = ldbm_nextkey( oldbm, olast, ocursorp );
678 while ( okey.dptr != NULL ) {
679 if ( charray_add_dup( &dvals, &dcur, &dmax, okey.dptr ) == NULL ) {
680 ldap_mods_free( mods, 1 );
684 okey = ldbm_nextkey( oldbm, olast, ocursorp );
685 if ( olast.dptr != NULL ) {
686 ldbm_datum_free( oldbm, olast );
690 if ( olast.dptr != NULL ) {
691 ldbm_datum_free( oldbm, olast );
693 while ( nkey.dptr != NULL ) {
694 if ( charray_add_dup( &avals, &acur, &amax, nkey.dptr ) == NULL ) {
695 ldap_mods_free( mods, 1 );
699 nkey = ldbm_nextkey( nldbm, nlast, ncursorp );
700 if ( nlast.dptr != NULL ) {
701 ldbm_datum_free( nldbm, nlast );
705 if ( nlast.dptr != NULL ) {
706 ldbm_datum_free( nldbm, nlast );
710 * if the underlying database is not ordered, we have to
711 * generate list of values to add by stepping through all new
712 * values and looking them up in the old centroid (not there => add),
713 * then stepping through all old values and looking them up in the
714 * new centroid (not there => delete). this yields O(Nf(N)) performance,
715 * where f(N) is the order to retrieve a single item.
718 /* generate list of values to add */
720 for ( key = ldbm_firstkey( nldbm, &ncursorp ); key.dptr != NULL;
721 key = ldbm_nextkey( nldbm, lastkey, ncursorp ) )
723 /* see if it's in the old one */
724 data = ldbm_fetch( oldbm, key );
726 /* not there - add it */
727 if ( data.dptr == NULL ) {
728 if ( charray_add_dup( &avals, &acur, &amax, key.dptr ) == NULL ) {
729 ldap_mods_free( mods, 1 );
733 ldbm_datum_free( oldbm, data );
735 if ( lastkey.dptr != NULL ) {
736 ldbm_datum_free( nldbm, lastkey );
740 if ( lastkey.dptr != NULL ) {
741 ldbm_datum_free( nldbm, lastkey );
744 /* generate list of values to delete */
746 for ( key = ldbm_firstkey( oldbm, &ocursorp ); key.dptr != NULL;
747 key = ldbm_nextkey( oldbm, lastkey, ocursorp ) )
749 /* see if it's in the new one */
750 data = ldbm_fetch( nldbm, key );
752 /* not there - delete it */
753 if ( data.dptr == NULL ) {
754 if ( charray_add_dup( &dvals, &dcur, &dmax, key.dptr ) == NULL ) {
755 ldap_mods_free( mods, 1 );
759 ldbm_datum_free( nldbm, data );
761 if ( lastkey.dptr != NULL ) {
762 ldbm_datum_free( oldbm, lastkey );
766 if ( lastkey.dptr != NULL ) {
767 ldbm_datum_free( oldbm, lastkey );
771 mods[0]->mod_values = avals;
772 mods[1]->mod_values = dvals;
779 if ( mods[1]->mod_values == NULL ) {
780 free( (char *) mods[1] );
783 if ( mods[0]->mod_values == NULL ) {
784 free( (char *) mods[0] );
788 if ( mods[0] == NULL ) {
789 free( (char *) mods );
811 printf( "Generating mods for full %s centroid...", attr );
815 ldbm_datum_init( key );
816 ldbm_datum_init( lastkey );
818 if ( (mods = (LDAPMod **) malloc( sizeof(LDAPMod *) * 3 )) == NULL ||
819 (mods[0] = (LDAPMod *) malloc( sizeof(LDAPMod) )) == NULL ||
820 (mods[1] = (LDAPMod *) malloc( sizeof(LDAPMod) )) == NULL ||
821 (vals = (char **) malloc( 2 * sizeof(char *) )) == NULL ||
822 (vals[0] = (char *) malloc( 20 )) == NULL )
825 exit( EXIT_FAILURE );
827 mods[0]->mod_op = LDAP_MOD_REPLACE;
828 mods[0]->mod_type = attr;
829 mods[0]->mod_values = NULL;
830 sprintf( vals[0], "%d", nentries );
832 mods[1]->mod_op = LDAP_MOD_REPLACE;
833 mods[1]->mod_type = "nentries";
834 mods[1]->mod_values = vals;
841 for ( key = ldbm_firstkey( ldbm, &cursorp ); key.dptr != NULL;
842 key = ldbm_nextkey( ldbm, lastkey, cursorp ) )
844 if ( charray_add_dup( &vals, &vcur, &vmax, key.dptr ) == NULL ) {
845 ldap_mods_free( mods, 1 );
849 if ( lastkey.dptr != NULL ) {
850 ldbm_datum_free( ldbm, lastkey );
854 if ( lastkey.dptr != NULL ) {
855 ldbm_datum_free( ldbm, lastkey );
857 mods[0]->mod_values = vals;
864 if ( mods[0]->mod_values == NULL ) {
865 free( (char *) mods[0] );
866 free( (char *) mods );
874 * extract the destination ldap host, port, and base object for the
875 * server to receive the index information. then, open a connection,
876 * bind, and see if the entry exists. if not, create it and set things
877 * up so the centroid full and diff routines can modify it to contain
878 * the new centroid information.
882 bind_to_destination_ldap(
891 char *attrs[2], *refvalues[2], *ocvalues[2];
897 printf( "Binding to destination LDAP server..." );
901 /* first, pick out the destination ldap server info */
902 if ( ldapbase != NULL ) {
906 if ( strncmp( ldapdesturl, "ldap://", 7 ) != 0 ) {
907 fputs( "Not an LDAP URL", stderr ); /* Should be smarter? */
911 if ( (s2 = strchr( s, '/' )) == NULL ) {
912 ldapbase = strdup( s );
916 if ( ldaphost != NULL )
918 ldaphost = strdup( s );
921 ldapbase = strdup( s2 + 1 );
923 strcpy( buf, "ref=" );
924 if ( strpbrk( ldapsrcurl, " ,;" ) != NULL ) {
927 for ( s = d = ldapsrcurl; *s; s++ ) {
933 strcat( buf, ldapsrcurl );
934 if ( strpbrk( ldapsrcurl, " ,;" ) != NULL ) {
938 strcat( buf, ldapbase );
940 ldapbase = strdup( buf );
942 if ( (ld = ldap_init( ldaphost, 0 )) == NULL ) {
943 perror( "ldap_init" );
947 if ( ldap_bind_s( ld, destldapbinddn, destldappasswd, destldapauthmethod )
949 ldap_perror( ld, "ldap_bind_s" );
959 rc = ldap_search_s( ld, ldapbase, LDAP_SCOPE_BASE, "(objectclass=*)",
963 if ( rc == LDAP_NO_SUCH_OBJECT ) {
965 printf( "%sCreating centroid entry...", not ? "Not " : "" );
969 /* create the centroid index entry */
971 m[0].mod_type = "ref";
972 refvalues[0] = ldapsrcurl;
974 m[0].mod_values = refvalues;
976 m[1].mod_type = "objectclass";
977 ocvalues[0] = "indexentry";
979 m[1].mod_values = ocvalues;
984 if ( !not && ldap_add_s( ld, ldapbase, mp ) != LDAP_SUCCESS ) {
985 ldap_perror( ld, ldapbase );
994 } else if ( rc != LDAP_SUCCESS ) {
995 ldap_perror( ld, "ldap_search_s" );
1012 *a = (char **) malloc( (BUFSIZ + 1) * sizeof(char *) );
1015 } else if ( *cur >= *max ) {
1017 *a = (char **) realloc( *a, (*max + 1) * sizeof(char *) );
1023 (*a)[(*cur)++] = strdup( s );