]> git.sur5r.net Git - glabels/blob - src/merge-text.c
Imported Upstream version 2.2.8
[glabels] / src / merge-text.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
2
3 /*
4  *  (GLABELS) Label and Business Card Creation program for GNOME
5  *
6  *  merge_text.c:  text-file merge backend module
7  *
8  *  Copyright (C) 2001  Jim Evins <evins@snaught.com>.
9  *
10  *  This program is free software; you can redistribute it and/or modify
11  *  it under the terms of the GNU General Public License as published by
12  *  the Free Software Foundation; either version 2 of the License, or
13  *  (at your option) any later version.
14  *
15  *  This program is distributed in the hope that it will be useful,
16  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *  GNU General Public License for more details.
19  *
20  *  You should have received a copy of the GNU General Public License
21  *  along with this program; if not, write to the Free Software
22  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
23  */
24
25 #include <config.h>
26
27 #include "merge-text.h"
28
29 #include <stdio.h>
30
31 #include "debug.h"
32
33 #define LINE_BUF_LEN 1024
34
35 /*===========================================*/
36 /* Private types                             */
37 /*===========================================*/
38
39 struct _glMergeTextPrivate {
40         gchar             delim;
41         FILE             *fp;
42 };
43
44 enum {
45         LAST_SIGNAL
46 };
47
48 enum {
49         ARG_0,
50         ARG_DELIM,
51 };
52
53 /*===========================================*/
54 /* Private globals                           */
55 /*===========================================*/
56
57
58 /*===========================================*/
59 /* Local function prototypes                 */
60 /*===========================================*/
61
62 static void           gl_merge_text_finalize        (GObject          *object);
63
64 static void           gl_merge_text_set_property    (GObject          *object,
65                                                      guint             param_id,
66                                                      const GValue     *value,
67                                                      GParamSpec       *pspec);
68
69 static void           gl_merge_text_get_property    (GObject          *object,
70                                                      guint             param_id,
71                                                      GValue           *value,
72                                                      GParamSpec       *pspec);
73
74 static GList         *gl_merge_text_get_key_list    (glMerge          *merge);
75 static gchar         *gl_merge_text_get_primary_key (glMerge          *merge);
76 static void           gl_merge_text_open            (glMerge          *merge);
77 static void           gl_merge_text_close           (glMerge          *merge);
78 static glMergeRecord *gl_merge_text_get_record      (glMerge          *merge);
79 static void           gl_merge_text_copy            (glMerge          *dst_merge,
80                                                      glMerge          *src_merge);
81
82 static GList         *parse_line                    (FILE             *fp,
83                                                      gchar             delim);
84 static gchar         *parse_field                   (gchar            *raw_field);
85 static void           free_fields                   (GList           **fields);
86
87 \f
88 /*****************************************************************************/
89 /* Boilerplate object stuff.                                                 */
90 /*****************************************************************************/
91 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE);
92
93 static void
94 gl_merge_text_class_init (glMergeTextClass *class)
95 {
96         GObjectClass *object_class = G_OBJECT_CLASS (class);
97         glMergeClass *merge_class  = GL_MERGE_CLASS (class);
98
99         gl_debug (DEBUG_MERGE, "START");
100
101         gl_merge_text_parent_class = g_type_class_peek_parent (class);
102
103         object_class->set_property = gl_merge_text_set_property;
104         object_class->get_property = gl_merge_text_get_property;
105
106         g_object_class_install_property
107                 (object_class,
108                  ARG_DELIM,
109                  g_param_spec_char ("delim", NULL, NULL,
110                                     0, 0x7F, ',',
111                                     (G_PARAM_READABLE | G_PARAM_WRITABLE)));
112
113         object_class->finalize = gl_merge_text_finalize;
114
115         merge_class->get_key_list    = gl_merge_text_get_key_list;
116         merge_class->get_primary_key = gl_merge_text_get_primary_key;
117         merge_class->open            = gl_merge_text_open;
118         merge_class->close           = gl_merge_text_close;
119         merge_class->get_record      = gl_merge_text_get_record;
120         merge_class->copy            = gl_merge_text_copy;
121
122         gl_debug (DEBUG_MERGE, "END");
123 }
124
125 static void
126 gl_merge_text_init (glMergeText *merge_text)
127 {
128         gl_debug (DEBUG_MERGE, "START");
129
130         merge_text->priv = g_new0 (glMergeTextPrivate, 1);
131
132         gl_debug (DEBUG_MERGE, "END");
133 }
134
135 static void
136 gl_merge_text_finalize (GObject *object)
137 {
138         glMergeText *merge_text = GL_MERGE_TEXT (object);
139
140         gl_debug (DEBUG_MERGE, "START");
141
142         g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
143
144         g_free (merge_text->priv);
145
146         G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
147
148         gl_debug (DEBUG_MERGE, "END");
149 }
150
151 /*--------------------------------------------------------------------------*/
152 /* Set argument.                                                            */
153 /*--------------------------------------------------------------------------*/
154 static void
155 gl_merge_text_set_property (GObject      *object,
156                             guint         param_id,
157                             const GValue *value,
158                             GParamSpec   *pspec)
159 {
160         glMergeText *merge_text;
161
162         merge_text = GL_MERGE_TEXT (object);
163
164         switch (param_id) {
165
166         case ARG_DELIM:
167                 merge_text->priv->delim = g_value_get_char (value);
168                 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
169                           merge_text->priv->delim);
170                 break;
171
172         default:
173                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
174                 break;
175
176         }
177
178 }
179
180 /*--------------------------------------------------------------------------*/
181 /* Get argument.                                                            */
182 /*--------------------------------------------------------------------------*/
183 static void
184 gl_merge_text_get_property (GObject     *object,
185                             guint        param_id,
186                             GValue      *value,
187                             GParamSpec  *pspec)
188 {
189         glMergeText *merge_text;
190
191         merge_text = GL_MERGE_TEXT (object);
192
193         switch (param_id) {
194
195         case ARG_DELIM:
196                 g_value_set_char (value, merge_text->priv->delim);
197                 break;
198
199         default:
200                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
201                 break;
202
203         }
204
205 }
206
207 /*--------------------------------------------------------------------------*/
208 /* Get key list.                                                            */
209 /*--------------------------------------------------------------------------*/
210 static GList *
211 gl_merge_text_get_key_list (glMerge *merge)
212 {
213         glMergeText   *merge_text;
214         GList         *record_list, *p_rec;
215         glMergeRecord *record;
216         GList         *p_field;
217         gint           i_field, n_fields, n_fields_max = 0;
218         GList         *key_list;
219         
220         /* Field keys are simply column numbers. */
221
222         gl_debug (DEBUG_MERGE, "BEGIN");
223
224         merge_text = GL_MERGE_TEXT (merge);
225
226         record_list = (GList *)gl_merge_get_record_list (merge);
227
228         for ( p_rec=record_list; p_rec!=NULL; p_rec=p_rec->next ) {
229                 record = (glMergeRecord *)p_rec->data;
230
231                 n_fields = 0;
232                 for ( p_field=record->field_list; p_field!=NULL; p_field=p_field->next ) {
233                         n_fields++;
234                 }
235                 if ( n_fields > n_fields_max ) n_fields_max = n_fields;
236         }
237
238         key_list = NULL;
239         for (i_field=1; i_field <= n_fields_max; i_field++) {
240                 key_list = g_list_append (key_list, g_strdup_printf ("%d", i_field));
241         }
242
243
244         gl_debug (DEBUG_MERGE, "END");
245
246         return key_list;
247 }
248
249 /*--------------------------------------------------------------------------*/
250 /* Get "primary" key.                                                       */
251 /*--------------------------------------------------------------------------*/
252 static gchar *
253 gl_merge_text_get_primary_key (glMerge *merge)
254 {
255         /* For now, let's always assume the first column is the primary key. */
256         return g_strdup ("1");
257 }
258
259 /*--------------------------------------------------------------------------*/
260 /* Open merge source.                                                       */
261 /*--------------------------------------------------------------------------*/
262 static void
263 gl_merge_text_open (glMerge *merge)
264 {
265         glMergeText *merge_text;
266         gchar       *src;
267
268         merge_text = GL_MERGE_TEXT (merge);
269
270         src = gl_merge_get_src (merge);
271
272         if (src != NULL) {
273                 merge_text->priv->fp = fopen (src, "r");
274         }
275
276         g_free (src);
277 }
278
279 /*--------------------------------------------------------------------------*/
280 /* Close merge source.                                                      */
281 /*--------------------------------------------------------------------------*/
282 static void
283 gl_merge_text_close (glMerge *merge)
284 {
285         glMergeText *merge_text;
286
287         merge_text = GL_MERGE_TEXT (merge);
288
289         if (merge_text->priv->fp != NULL) {
290
291                 fclose (merge_text->priv->fp);
292                 merge_text->priv->fp = NULL;
293
294         }
295 }
296
297 /*--------------------------------------------------------------------------*/
298 /* Get next record from merge source, NULL if no records left (i.e EOF)     */
299 /*--------------------------------------------------------------------------*/
300 static glMergeRecord *
301 gl_merge_text_get_record (glMerge *merge)
302 {
303         glMergeText   *merge_text;
304         gchar          delim;
305         FILE          *fp;
306         glMergeRecord *record;
307         GList         *fields, *p;
308         gint           i_field;
309         glMergeField  *field;
310
311         merge_text = GL_MERGE_TEXT (merge);
312
313         delim = merge_text->priv->delim;
314         fp    = merge_text->priv->fp;
315
316         if (fp == NULL) {
317                 return NULL;
318         }
319                
320         fields = parse_line (fp, delim);
321         if ( fields == NULL ) {
322                 return NULL;
323         }
324
325         record = g_new0 (glMergeRecord, 1);
326         record->select_flag = TRUE;
327         i_field = 1;
328         for (p=fields; p != NULL; p=p->next) {
329
330                 field = g_new0 (glMergeField, 1);
331                 field->key = g_strdup_printf ("%d", i_field++);
332 #ifndef CSV_ALWAYS_UTF8
333                 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
334 #else
335                 field->value = g_strdup (p->data);
336 #endif
337
338                 record->field_list = g_list_append (record->field_list, field);
339         }
340         free_fields (&fields);
341
342         return record;
343 }
344
345 /*---------------------------------------------------------------------------*/
346 /* Copy merge_text specific fields.                                          */
347 /*---------------------------------------------------------------------------*/
348 static void
349 gl_merge_text_copy (glMerge *dst_merge,
350                     glMerge *src_merge)
351 {
352         glMergeText *dst_merge_text;
353         glMergeText *src_merge_text;
354
355         dst_merge_text = GL_MERGE_TEXT (dst_merge);
356         src_merge_text = GL_MERGE_TEXT (src_merge);
357
358         dst_merge_text->priv->delim = src_merge_text->priv->delim;
359 }
360
361 /*---------------------------------------------------------------------------*/
362 /* PRIVATE.  Parse line.                                                     */
363 /*                                                                           */
364 /* Attempt to be a robust parser of various CSV (and similar) formats.       */
365 /*                                                                           */
366 /* Split into fields, accounting for:                                        */
367 /*   - delimeters may be embedded in quoted text (")                         */
368 /*   - delimeters may be "escaped" by a leading backslash (\)                */
369 /*   - quotes may be embedded in quoted text as two adjacent quotes ("")     */
370 /*   - quotes may be "escaped" either within or outside of quoted text.      */
371 /*   - newlines may be embedded in quoted text, allowing a field to span     */
372 /*     more than one line.                                                   */
373 /*                                                                           */
374 /* This function does not do any parsing of the individual fields, other     */
375 /* than to correctly interpet delimeters.  Actual parsing of the individual  */
376 /* fields is done in parse_field().                                          */
377 /*                                                                           */
378 /* Returns a list of fields.  A blank line is considered a line with one     */
379 /* empty field.  Returns empty (NULL) when done.                             */
380 /*---------------------------------------------------------------------------*/
381 static GList *
382 parse_line (FILE  *fp,
383             gchar  delim )
384 {
385         GList *list = NULL;
386         GString *string;
387         gint c;
388         enum { BEGIN, NORMAL, QUOTED, QUOTED_QUOTE1,
389                NORMAL_ESCAPED, QUOTED_ESCAPED, DONE } state;
390
391         state = BEGIN;
392         string = g_string_new( "" );
393         while ( state != DONE ) {
394                 c=getc (fp);
395
396                 switch (state) {
397
398                 case BEGIN:
399                         if ( c == delim )
400                         {
401                                 /* first field is empty. */
402                                 list = g_list_append (list, g_strdup (""));
403                                 state = NORMAL;
404                                 break;
405                         }
406                         switch (c) {
407                         case '"':
408                                 string = g_string_append_c (string, c);
409                                 state = QUOTED;
410                                 break;
411                         case '\\':
412                                 string = g_string_append_c (string, c);
413                                 state = NORMAL_ESCAPED;
414                                 break;
415                         case '\n':
416                                 /* treat as one empty field. */
417                                 list = g_list_append (list, g_strdup (""));
418                                 state = DONE;
419                                 break;
420                         case EOF:
421                                 /* end of file, no more lines. */
422                                 state = DONE;
423                                 break;
424                         default:
425                                 string = g_string_append_c (string, c);
426                                 state = NORMAL;
427                                 break;
428                         }
429                         break;
430
431                 case NORMAL:
432                         if ( c == delim )
433                         {
434                                 list = g_list_append (list, parse_field (string->str));
435                                 string = g_string_assign( string, "" );
436                                 state = NORMAL;
437                                 break;
438                         }
439                         switch (c) {
440                         case '"':
441                                 string = g_string_append_c (string, c);
442                                 state = QUOTED;
443                                 break;
444                         case '\\':
445                                 string = g_string_append_c (string, c);
446                                 state = NORMAL_ESCAPED;
447                                 break;
448                         case '\n':
449                         case EOF:
450                                 list = g_list_append (list, parse_field (string->str));
451                                 state = DONE;
452                                 break;
453                         default:
454                                 string = g_string_append_c (string, c);
455                                 state = NORMAL;
456                                 break;
457                         }
458                         break;
459
460                 case QUOTED:
461                         switch (c) {
462                         case '"':
463                                 string = g_string_append_c (string, c);
464                                 state = QUOTED_QUOTE1;
465                                 break;
466                         case '\\':
467                                 string = g_string_append_c (string, c);
468                                 state = QUOTED_ESCAPED;
469                                 break;
470                         case EOF:
471                                 /* File ended mid way through quoted item */
472                                 list = g_list_append (list, parse_field (string->str));
473                                 state = DONE;
474                                 break;
475                         default:
476                                 string = g_string_append_c (string, c);
477                                 break;
478                         }
479                         break;
480
481                 case QUOTED_QUOTE1:
482                         if ( c == delim )
483                         {
484                                 list = g_list_append (list, parse_field (string->str));
485                                 string = g_string_assign( string, "" );
486                                 state = NORMAL;
487                                 break;
488                         }
489                         switch (c) {
490                         case '"':
491                                 /* insert quotes in string, stay quoted. */
492                                 string = g_string_append_c (string, c);
493                                 state = QUOTED;
494                                 break;
495                         case '\n':
496                         case EOF:
497                                 /* line or file ended after quoted item */
498                                 list = g_list_append (list, parse_field (string->str));
499                                 state = DONE;
500                                 break;
501                         default:
502                                 string = g_string_append_c (string, c);
503                                 state = NORMAL;
504                                 break;
505                         }
506                         break;
507
508                 case NORMAL_ESCAPED:
509                         switch (c) {
510                         case EOF:
511                                 /* File ended mid way through quoted item */
512                                 list = g_list_append (list, parse_field (string->str));
513                                 state = DONE;
514                                 break;
515                         default:
516                                 string = g_string_append_c (string, c);
517                                 state = NORMAL;
518                                 break;
519                         }
520                         break;
521
522                 case QUOTED_ESCAPED:
523                         switch (c) {
524                         case EOF:
525                                 /* File ended mid way through quoted item */
526                                 list = g_list_append (list, parse_field (string->str));
527                                 state = DONE;
528                                 break;
529                         default:
530                                 string = g_string_append_c (string, c);
531                                 state = QUOTED;
532                                 break;
533                         }
534                         break;
535
536                 default:
537                         g_assert_not_reached();
538                         break;
539                 }
540
541         }
542         g_string_free( string, TRUE );
543
544         return list;
545 }
546
547 /*---------------------------------------------------------------------------*/
548 /* PRIVATE.  Parse field.                                                    */
549 /*                                                                           */
550 /*  - Strip leading and trailing white space, unless quoted.                 */
551 /*  - Strip CR, unless escaped.                                              */
552 /*  - Expand '\n' and '\t' into newline and tab characters.                  */
553 /*  - Remove quotes, unless escaped (\" anywhere or "" within quotes)        */
554 /*---------------------------------------------------------------------------*/
555 static gchar *
556 parse_field (gchar  *raw_field)
557 {
558         GString *string;
559         gchar   *pass1_field, *c, *field;
560         enum { NORMAL, NORMAL_ESCAPED, QUOTED, QUOTED_ESCAPED, QUOTED_QUOTE1} state;
561
562
563         /*
564          * Pass 1: remove leading and trailing spaces.
565          */
566         pass1_field = g_strdup (raw_field);
567         g_strstrip (pass1_field);
568
569         /*
570          * Pass 2: resolve quoting and escaping.
571          */
572         state = NORMAL;
573         string = g_string_new( "" );
574         for ( c=pass1_field; *c != 0; c++ )
575         {
576                 switch (state) {
577
578                 case NORMAL:
579                         switch (*c) {
580                         case '\\':
581                                 state = NORMAL_ESCAPED;
582                                 break;
583                         case '"':
584                                 state = QUOTED;
585                                 break;
586                         case '\r':
587                                 /* Strip CR. */
588                                 break;
589                         default:
590                                 string = g_string_append_c (string, *c);
591                                 break;
592                         }
593                         break;
594
595                 case NORMAL_ESCAPED:
596                         switch (*c) {
597                         case 'n':
598                                 string = g_string_append_c (string, '\n');
599                                 state = NORMAL;
600                                 break;
601                         case 't':
602                                 string = g_string_append_c (string, '\t');
603                                 state = NORMAL;
604                                 break;
605                         default:
606                                 string = g_string_append_c (string, *c);
607                                 state = NORMAL;
608                                 break;
609                         }
610                         break;
611
612                 case QUOTED:
613                         switch (*c) {
614                         case '\\':
615                                 state = QUOTED_ESCAPED;
616                                 break;
617                         case '"':
618                                 state = QUOTED_QUOTE1;
619                                 break;
620                         case '\r':
621                                 /* Strip CR. */
622                                 break;
623                         default:
624                                 string = g_string_append_c (string, *c);
625                                 break;
626                         }
627                         break;
628
629                 case QUOTED_ESCAPED:
630                         switch (*c) {
631                         case 'n':
632                                 string = g_string_append_c (string, '\n');
633                                 state = QUOTED;
634                                 break;
635                         case 't':
636                                 string = g_string_append_c (string, '\t');
637                                 state = QUOTED;
638                                 break;
639                         default:
640                                 string = g_string_append_c (string, *c);
641                                 state = QUOTED;
642                                 break;
643                         }
644                         break;
645
646                 case QUOTED_QUOTE1:
647                         switch (*c) {
648                         case '"':
649                                 /* insert quotes in string, stay quoted. */
650                                 string = g_string_append_c (string, *c);
651                                 state = QUOTED;
652                                 break;
653                         case '\r':
654                                 /* Strip CR, return to QUOTED. */
655                                 state = QUOTED;
656                                 break;
657                         default:
658                                 string = g_string_append_c (string, *c);
659                                 state = NORMAL;
660                                 break;
661                         }
662                         break;
663
664                 default:
665                         g_assert_not_reached();
666                         break;
667                 }
668
669         }
670
671         field = g_strdup (string->str);
672         g_string_free( string, TRUE );
673         g_free (pass1_field);
674
675         return field;
676 }
677
678 /*---------------------------------------------------------------------------*/
679 /* Free list of fields.                                                      */
680 /*---------------------------------------------------------------------------*/
681 void
682 free_fields (GList ** list)
683 {
684         GList *p;
685
686         for (p = *list; p != NULL; p = p->next) {
687                 g_free (p->data);
688                 p->data = NULL;
689         }
690
691         g_list_free (*list);
692         *list = NULL;
693 }
694