]> git.sur5r.net Git - glabels/blob - glabels2/src/merge-text.c
079eb5b9dd9af17e0408817a5bddf78715af1aca
[glabels] / glabels2 / src / merge-text.c
1 /*
2  *  merge-text.c
3  *  Copyright (C) 2001-2009  Jim Evins <evins@snaught.com>.
4  *
5  *  This file is part of gLabels.
6  *
7  *  gLabels is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  gLabels is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with gLabels.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #include <config.h>
22
23 #include "merge-text.h"
24
25 #include <stdio.h>
26
27 #include "debug.h"
28
29 #define LINE_BUF_LEN 1024
30
31
32 /*===========================================*/
33 /* Private types                             */
34 /*===========================================*/
35
36 struct _glMergeTextPrivate {
37         gchar             delim;
38         FILE             *fp;
39 };
40
41 enum {
42         LAST_SIGNAL
43 };
44
45 enum {
46         ARG_0,
47         ARG_DELIM,
48 };
49
50
51 /*===========================================*/
52 /* Private globals                           */
53 /*===========================================*/
54
55
56 /*===========================================*/
57 /* Local function prototypes                 */
58 /*===========================================*/
59
60 static void           gl_merge_text_finalize        (GObject          *object);
61
62 static void           gl_merge_text_set_property    (GObject          *object,
63                                                      guint             param_id,
64                                                      const GValue     *value,
65                                                      GParamSpec       *pspec);
66
67 static void           gl_merge_text_get_property    (GObject          *object,
68                                                      guint             param_id,
69                                                      GValue           *value,
70                                                      GParamSpec       *pspec);
71
72 static GList         *gl_merge_text_get_key_list    (glMerge          *merge);
73 static gchar         *gl_merge_text_get_primary_key (glMerge          *merge);
74 static void           gl_merge_text_open            (glMerge          *merge);
75 static void           gl_merge_text_close           (glMerge          *merge);
76 static glMergeRecord *gl_merge_text_get_record      (glMerge          *merge);
77 static void           gl_merge_text_copy            (glMerge          *dst_merge,
78                                                      glMerge          *src_merge);
79
80 static GList         *parse_line                    (FILE             *fp,
81                                                      gchar             delim);
82 static gchar         *parse_field                   (gchar            *raw_field);
83 static void           free_fields                   (GList           **fields);
84
85
86 /*****************************************************************************/
87 /* Boilerplate object stuff.                                                 */
88 /*****************************************************************************/
89 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE);
90
91
92 static void
93 gl_merge_text_class_init (glMergeTextClass *class)
94 {
95         GObjectClass *object_class = G_OBJECT_CLASS (class);
96         glMergeClass *merge_class  = GL_MERGE_CLASS (class);
97
98         gl_debug (DEBUG_MERGE, "START");
99
100         gl_merge_text_parent_class = g_type_class_peek_parent (class);
101
102         object_class->set_property = gl_merge_text_set_property;
103         object_class->get_property = gl_merge_text_get_property;
104
105         g_object_class_install_property
106                 (object_class,
107                  ARG_DELIM,
108                  g_param_spec_char ("delim", NULL, NULL,
109                                     0, 0x7F, ',',
110                                     (G_PARAM_READABLE | G_PARAM_WRITABLE)));
111
112         object_class->finalize = gl_merge_text_finalize;
113
114         merge_class->get_key_list    = gl_merge_text_get_key_list;
115         merge_class->get_primary_key = gl_merge_text_get_primary_key;
116         merge_class->open            = gl_merge_text_open;
117         merge_class->close           = gl_merge_text_close;
118         merge_class->get_record      = gl_merge_text_get_record;
119         merge_class->copy            = gl_merge_text_copy;
120
121         gl_debug (DEBUG_MERGE, "END");
122 }
123
124
125 static void
126 gl_merge_text_init (glMergeText *merge_text)
127 {
128         gl_debug (DEBUG_MERGE, "START");
129
130         merge_text->priv = g_new0 (glMergeTextPrivate, 1);
131
132         gl_debug (DEBUG_MERGE, "END");
133 }
134
135
136 static void
137 gl_merge_text_finalize (GObject *object)
138 {
139         glMergeText *merge_text = GL_MERGE_TEXT (object);
140
141         gl_debug (DEBUG_MERGE, "START");
142
143         g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
144
145         g_free (merge_text->priv);
146
147         G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
148
149         gl_debug (DEBUG_MERGE, "END");
150 }
151
152
153 /*--------------------------------------------------------------------------*/
154 /* Set argument.                                                            */
155 /*--------------------------------------------------------------------------*/
156 static void
157 gl_merge_text_set_property (GObject      *object,
158                             guint         param_id,
159                             const GValue *value,
160                             GParamSpec   *pspec)
161 {
162         glMergeText *merge_text;
163
164         merge_text = GL_MERGE_TEXT (object);
165
166         switch (param_id) {
167
168         case ARG_DELIM:
169                 merge_text->priv->delim = g_value_get_char (value);
170                 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
171                           merge_text->priv->delim);
172                 break;
173
174         default:
175                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
176                 break;
177
178         }
179
180 }
181
182
183 /*--------------------------------------------------------------------------*/
184 /* Get argument.                                                            */
185 /*--------------------------------------------------------------------------*/
186 static void
187 gl_merge_text_get_property (GObject     *object,
188                             guint        param_id,
189                             GValue      *value,
190                             GParamSpec  *pspec)
191 {
192         glMergeText *merge_text;
193
194         merge_text = GL_MERGE_TEXT (object);
195
196         switch (param_id) {
197
198         case ARG_DELIM:
199                 g_value_set_char (value, merge_text->priv->delim);
200                 break;
201
202         default:
203                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
204                 break;
205
206         }
207
208 }
209
210
211 /*--------------------------------------------------------------------------*/
212 /* Get key list.                                                            */
213 /*--------------------------------------------------------------------------*/
214 static GList *
215 gl_merge_text_get_key_list (glMerge *merge)
216 {
217         glMergeText   *merge_text;
218         GList         *record_list, *p_rec;
219         glMergeRecord *record;
220         GList         *p_field;
221         gint           i_field, n_fields, n_fields_max = 0;
222         GList         *key_list;
223         
224         /* Field keys are simply column numbers. */
225
226         gl_debug (DEBUG_MERGE, "BEGIN");
227
228         merge_text = GL_MERGE_TEXT (merge);
229
230         record_list = (GList *)gl_merge_get_record_list (merge);
231
232         for ( p_rec=record_list; p_rec!=NULL; p_rec=p_rec->next ) {
233                 record = (glMergeRecord *)p_rec->data;
234
235                 n_fields = 0;
236                 for ( p_field=record->field_list; p_field!=NULL; p_field=p_field->next ) {
237                         n_fields++;
238                 }
239                 if ( n_fields > n_fields_max ) n_fields_max = n_fields;
240         }
241
242         key_list = NULL;
243         for (i_field=1; i_field <= n_fields_max; i_field++) {
244                 key_list = g_list_append (key_list, g_strdup_printf ("%d", i_field));
245         }
246
247
248         gl_debug (DEBUG_MERGE, "END");
249
250         return key_list;
251 }
252
253
254 /*--------------------------------------------------------------------------*/
255 /* Get "primary" key.                                                       */
256 /*--------------------------------------------------------------------------*/
257 static gchar *
258 gl_merge_text_get_primary_key (glMerge *merge)
259 {
260         /* For now, let's always assume the first column is the primary key. */
261         return g_strdup ("1");
262 }
263
264
265 /*--------------------------------------------------------------------------*/
266 /* Open merge source.                                                       */
267 /*--------------------------------------------------------------------------*/
268 static void
269 gl_merge_text_open (glMerge *merge)
270 {
271         glMergeText *merge_text;
272         gchar       *src;
273
274         merge_text = GL_MERGE_TEXT (merge);
275
276         src = gl_merge_get_src (merge);
277
278         if (src != NULL) {
279                 merge_text->priv->fp = fopen (src, "r");
280         }
281
282         g_free (src);
283 }
284
285
286 /*--------------------------------------------------------------------------*/
287 /* Close merge source.                                                      */
288 /*--------------------------------------------------------------------------*/
289 static void
290 gl_merge_text_close (glMerge *merge)
291 {
292         glMergeText *merge_text;
293
294         merge_text = GL_MERGE_TEXT (merge);
295
296         if (merge_text->priv->fp != NULL) {
297
298                 fclose (merge_text->priv->fp);
299                 merge_text->priv->fp = NULL;
300
301         }
302 }
303
304
305 /*--------------------------------------------------------------------------*/
306 /* Get next record from merge source, NULL if no records left (i.e EOF)     */
307 /*--------------------------------------------------------------------------*/
308 static glMergeRecord *
309 gl_merge_text_get_record (glMerge *merge)
310 {
311         glMergeText   *merge_text;
312         gchar          delim;
313         FILE          *fp;
314         glMergeRecord *record;
315         GList         *fields, *p;
316         gint           i_field;
317         glMergeField  *field;
318
319         merge_text = GL_MERGE_TEXT (merge);
320
321         delim = merge_text->priv->delim;
322         fp    = merge_text->priv->fp;
323
324         if (fp == NULL) {
325                 return NULL;
326         }
327                
328         fields = parse_line (fp, delim);
329         if ( fields == NULL ) {
330                 return NULL;
331         }
332
333         record = g_new0 (glMergeRecord, 1);
334         record->select_flag = TRUE;
335         i_field = 1;
336         for (p=fields; p != NULL; p=p->next) {
337
338                 field = g_new0 (glMergeField, 1);
339                 field->key = g_strdup_printf ("%d", i_field++);
340 #ifndef CSV_ALWAYS_UTF8
341                 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
342 #else
343                 field->value = g_strdup (p->data);
344 #endif
345
346                 record->field_list = g_list_append (record->field_list, field);
347         }
348         free_fields (&fields);
349
350         return record;
351 }
352
353
354 /*---------------------------------------------------------------------------*/
355 /* Copy merge_text specific fields.                                          */
356 /*---------------------------------------------------------------------------*/
357 static void
358 gl_merge_text_copy (glMerge *dst_merge,
359                     glMerge *src_merge)
360 {
361         glMergeText *dst_merge_text;
362         glMergeText *src_merge_text;
363
364         dst_merge_text = GL_MERGE_TEXT (dst_merge);
365         src_merge_text = GL_MERGE_TEXT (src_merge);
366
367         dst_merge_text->priv->delim = src_merge_text->priv->delim;
368 }
369
370
371 /*---------------------------------------------------------------------------*/
372 /* PRIVATE.  Parse line.                                                     */
373 /*                                                                           */
374 /* Attempt to be a robust parser of various CSV (and similar) formats.       */
375 /*                                                                           */
376 /* Split into fields, accounting for:                                        */
377 /*   - delimeters may be embedded in quoted text (")                         */
378 /*   - delimeters may be "escaped" by a leading backslash (\)                */
379 /*   - quotes may be embedded in quoted text as two adjacent quotes ("")     */
380 /*   - quotes may be "escaped" either within or outside of quoted text.      */
381 /*   - newlines may be embedded in quoted text, allowing a field to span     */
382 /*     more than one line.                                                   */
383 /*                                                                           */
384 /* This function does not do any parsing of the individual fields, other     */
385 /* than to correctly interpet delimeters.  Actual parsing of the individual  */
386 /* fields is done in parse_field().                                          */
387 /*                                                                           */
388 /* Returns a list of fields.  A blank line is considered a line with one     */
389 /* empty field.  Returns empty (NULL) when done.                             */
390 /*---------------------------------------------------------------------------*/
391 static GList *
392 parse_line (FILE  *fp,
393             gchar  delim )
394 {
395         GList *list = NULL;
396         GString *string;
397         gint c;
398         enum { BEGIN, NORMAL, QUOTED, QUOTED_QUOTE1,
399                NORMAL_ESCAPED, QUOTED_ESCAPED, DONE } state;
400
401         state = BEGIN;
402         string = g_string_new( "" );
403         while ( state != DONE ) {
404                 c=getc (fp);
405
406                 switch (state) {
407
408                 case BEGIN:
409                         if ( c == delim )
410                         {
411                                 /* first field is empty. */
412                                 list = g_list_append (list, g_strdup (""));
413                                 state = NORMAL;
414                                 break;
415                         }
416                         switch (c) {
417                         case '"':
418                                 string = g_string_append_c (string, c);
419                                 state = QUOTED;
420                                 break;
421                         case '\\':
422                                 string = g_string_append_c (string, c);
423                                 state = NORMAL_ESCAPED;
424                                 break;
425                         case '\n':
426                                 /* treat as one empty field. */
427                                 list = g_list_append (list, g_strdup (""));
428                                 state = DONE;
429                                 break;
430                         case EOF:
431                                 /* end of file, no more lines. */
432                                 state = DONE;
433                                 break;
434                         default:
435                                 string = g_string_append_c (string, c);
436                                 state = NORMAL;
437                                 break;
438                         }
439                         break;
440
441                 case NORMAL:
442                         if ( c == delim )
443                         {
444                                 list = g_list_append (list, parse_field (string->str));
445                                 string = g_string_assign( string, "" );
446                                 state = NORMAL;
447                                 break;
448                         }
449                         switch (c) {
450                         case '"':
451                                 string = g_string_append_c (string, c);
452                                 state = QUOTED;
453                                 break;
454                         case '\\':
455                                 string = g_string_append_c (string, c);
456                                 state = NORMAL_ESCAPED;
457                                 break;
458                         case '\n':
459                         case EOF:
460                                 list = g_list_append (list, parse_field (string->str));
461                                 state = DONE;
462                                 break;
463                         default:
464                                 string = g_string_append_c (string, c);
465                                 state = NORMAL;
466                                 break;
467                         }
468                         break;
469
470                 case QUOTED:
471                         switch (c) {
472                         case '"':
473                                 string = g_string_append_c (string, c);
474                                 state = QUOTED_QUOTE1;
475                                 break;
476                         case '\\':
477                                 string = g_string_append_c (string, c);
478                                 state = QUOTED_ESCAPED;
479                                 break;
480                         case EOF:
481                                 /* File ended mid way through quoted item */
482                                 list = g_list_append (list, parse_field (string->str));
483                                 state = DONE;
484                                 break;
485                         default:
486                                 string = g_string_append_c (string, c);
487                                 break;
488                         }
489                         break;
490
491                 case QUOTED_QUOTE1:
492                         if ( c == delim )
493                         {
494                                 list = g_list_append (list, parse_field (string->str));
495                                 string = g_string_assign( string, "" );
496                                 state = NORMAL;
497                                 break;
498                         }
499                         switch (c) {
500                         case '"':
501                                 /* insert quotes in string, stay quoted. */
502                                 string = g_string_append_c (string, c);
503                                 state = QUOTED;
504                                 break;
505                         case '\n':
506                         case EOF:
507                                 /* line or file ended after quoted item */
508                                 list = g_list_append (list, parse_field (string->str));
509                                 state = DONE;
510                                 break;
511                         default:
512                                 string = g_string_append_c (string, c);
513                                 state = NORMAL;
514                                 break;
515                         }
516                         break;
517
518                 case NORMAL_ESCAPED:
519                         switch (c) {
520                         case EOF:
521                                 /* File ended mid way through quoted item */
522                                 list = g_list_append (list, parse_field (string->str));
523                                 state = DONE;
524                                 break;
525                         default:
526                                 string = g_string_append_c (string, c);
527                                 state = NORMAL;
528                                 break;
529                         }
530                         break;
531
532                 case QUOTED_ESCAPED:
533                         switch (c) {
534                         case EOF:
535                                 /* File ended mid way through quoted item */
536                                 list = g_list_append (list, parse_field (string->str));
537                                 state = DONE;
538                                 break;
539                         default:
540                                 string = g_string_append_c (string, c);
541                                 state = QUOTED;
542                                 break;
543                         }
544                         break;
545
546                 default:
547                         g_assert_not_reached();
548                         break;
549                 }
550
551         }
552         g_string_free( string, TRUE );
553
554         return list;
555 }
556
557
558 /*---------------------------------------------------------------------------*/
559 /* PRIVATE.  Parse field.                                                    */
560 /*                                                                           */
561 /*  - Strip leading and trailing white space, unless quoted.                 */
562 /*  - Strip CR, unless escaped.                                              */
563 /*  - Expand '\n' and '\t' into newline and tab characters.                  */
564 /*  - Remove quotes, unless escaped (\" anywhere or "" within quotes)        */
565 /*---------------------------------------------------------------------------*/
566 static gchar *
567 parse_field (gchar  *raw_field)
568 {
569         GString *string;
570         gchar   *pass1_field, *c, *field;
571         enum { NORMAL, NORMAL_ESCAPED, QUOTED, QUOTED_ESCAPED, QUOTED_QUOTE1} state;
572
573
574         /*
575          * Pass 1: remove leading and trailing spaces.
576          */
577         pass1_field = g_strdup (raw_field);
578         g_strstrip (pass1_field);
579
580         /*
581          * Pass 2: resolve quoting and escaping.
582          */
583         state = NORMAL;
584         string = g_string_new( "" );
585         for ( c=pass1_field; *c != 0; c++ )
586         {
587                 switch (state) {
588
589                 case NORMAL:
590                         switch (*c) {
591                         case '\\':
592                                 state = NORMAL_ESCAPED;
593                                 break;
594                         case '"':
595                                 state = QUOTED;
596                                 break;
597                         case '\r':
598                                 /* Strip CR. */
599                                 break;
600                         default:
601                                 string = g_string_append_c (string, *c);
602                                 break;
603                         }
604                         break;
605
606                 case NORMAL_ESCAPED:
607                         switch (*c) {
608                         case 'n':
609                                 string = g_string_append_c (string, '\n');
610                                 state = NORMAL;
611                                 break;
612                         case 't':
613                                 string = g_string_append_c (string, '\t');
614                                 state = NORMAL;
615                                 break;
616                         default:
617                                 string = g_string_append_c (string, *c);
618                                 state = NORMAL;
619                                 break;
620                         }
621                         break;
622
623                 case QUOTED:
624                         switch (*c) {
625                         case '\\':
626                                 state = QUOTED_ESCAPED;
627                                 break;
628                         case '"':
629                                 state = QUOTED_QUOTE1;
630                                 break;
631                         case '\r':
632                                 /* Strip CR. */
633                                 break;
634                         default:
635                                 string = g_string_append_c (string, *c);
636                                 break;
637                         }
638                         break;
639
640                 case QUOTED_ESCAPED:
641                         switch (*c) {
642                         case 'n':
643                                 string = g_string_append_c (string, '\n');
644                                 state = QUOTED;
645                                 break;
646                         case 't':
647                                 string = g_string_append_c (string, '\t');
648                                 state = QUOTED;
649                                 break;
650                         default:
651                                 string = g_string_append_c (string, *c);
652                                 state = QUOTED;
653                                 break;
654                         }
655                         break;
656
657                 case QUOTED_QUOTE1:
658                         switch (*c) {
659                         case '"':
660                                 /* insert quotes in string, stay quoted. */
661                                 string = g_string_append_c (string, *c);
662                                 state = QUOTED;
663                                 break;
664                         case '\r':
665                                 /* Strip CR, return to QUOTED. */
666                                 state = QUOTED;
667                                 break;
668                         default:
669                                 string = g_string_append_c (string, *c);
670                                 state = NORMAL;
671                                 break;
672                         }
673                         break;
674
675                 default:
676                         g_assert_not_reached();
677                         break;
678                 }
679
680         }
681
682         field = g_strdup (string->str);
683         g_string_free( string, TRUE );
684         g_free (pass1_field);
685
686         return field;
687 }
688
689
690 /*---------------------------------------------------------------------------*/
691 /* Free list of fields.                                                      */
692 /*---------------------------------------------------------------------------*/
693 void
694 free_fields (GList ** list)
695 {
696         GList *p;
697
698         for (p = *list; p != NULL; p = p->next) {
699                 g_free (p->data);
700                 p->data = NULL;
701         }
702
703         g_list_free (*list);
704         *list = NULL;
705 }
706
707
708
709 /*
710  * Local Variables:       -- emacs
711  * mode: C                -- emacs
712  * c-basic-offset: 8      -- emacs
713  * tab-width: 8           -- emacs
714  * indent-tabs-mode: nil  -- emacs
715  * End:                   -- emacs
716  */