]> git.sur5r.net Git - glabels/blob - src/merge-text.c
Imported Upstream version 3.2.0
[glabels] / src / merge-text.c
1 /*
2  *  merge-text.c
3  *  Copyright (C) 2001-2009  Jim Evins <evins@snaught.com>.
4  *
5  *  This file is part of gLabels.
6  *
7  *  gLabels is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  gLabels is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with gLabels.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #include <config.h>
22
23 #include "merge-text.h"
24
25 #include <stdio.h>
26
27 #include "debug.h"
28
29 #define LINE_BUF_LEN 1024
30
31
32 /*===========================================*/
33 /* Private types                             */
34 /*===========================================*/
35
36 struct _glMergeTextPrivate {
37
38         gchar             delim;
39         gboolean          line1_has_keys;
40
41         FILE             *fp;
42
43         GPtrArray        *keys;
44         gint              n_fields_max;
45 };
46
47 enum {
48         LAST_SIGNAL
49 };
50
51 enum {
52         ARG_0,
53         ARG_DELIM,
54         ARG_LINE1_HAS_KEYS
55 };
56
57
58 /*===========================================*/
59 /* Private globals                           */
60 /*===========================================*/
61
62
63 /*===========================================*/
64 /* Local function prototypes                 */
65 /*===========================================*/
66
67 static void           gl_merge_text_finalize        (GObject          *object);
68
69 static void           gl_merge_text_set_property    (GObject          *object,
70                                                      guint             param_id,
71                                                      const GValue     *value,
72                                                      GParamSpec       *pspec);
73
74 static void           gl_merge_text_get_property    (GObject          *object,
75                                                      guint             param_id,
76                                                      GValue           *value,
77                                                      GParamSpec       *pspec);
78
79 static gchar         *key_from_index                (glMergeText      *merge_text,
80                                                      gint              i_field);
81 static void           clear_keys                    (glMergeText      *merge_text);
82
83 static GList         *gl_merge_text_get_key_list    (const glMerge    *merge);
84 static gchar         *gl_merge_text_get_primary_key (const glMerge    *merge);
85 static void           gl_merge_text_open            (glMerge          *merge);
86 static void           gl_merge_text_close           (glMerge          *merge);
87 static glMergeRecord *gl_merge_text_get_record      (glMerge          *merge);
88 static void           gl_merge_text_copy            (glMerge          *dst_merge,
89                                                      const glMerge    *src_merge);
90
91 static GList         *parse_line                    (FILE             *fp,
92                                                      gchar             delim);
93 static void           free_fields                   (GList           **fields);
94
95
96
97 /*****************************************************************************/
98 /* Boilerplate object stuff.                                                 */
99 /*****************************************************************************/
100 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE)
101
102
103 static void
104 gl_merge_text_class_init (glMergeTextClass *class)
105 {
106         GObjectClass *object_class = G_OBJECT_CLASS (class);
107         glMergeClass *merge_class  = GL_MERGE_CLASS (class);
108
109         gl_debug (DEBUG_MERGE, "START");
110
111         gl_merge_text_parent_class = g_type_class_peek_parent (class);
112
113         object_class->set_property = gl_merge_text_set_property;
114         object_class->get_property = gl_merge_text_get_property;
115
116         g_object_class_install_property
117                 (object_class,
118                  ARG_DELIM,
119                  g_param_spec_char ("delim", NULL, NULL,
120                                     0, 0x7F, ',',
121                                     (G_PARAM_READABLE | G_PARAM_WRITABLE)));
122
123         g_object_class_install_property
124                 (object_class,
125                  ARG_LINE1_HAS_KEYS,
126                  g_param_spec_boolean ("line1_has_keys", NULL, NULL,
127                                        FALSE,
128                                        (G_PARAM_READABLE | G_PARAM_WRITABLE)));
129
130         object_class->finalize = gl_merge_text_finalize;
131
132         merge_class->get_key_list    = gl_merge_text_get_key_list;
133         merge_class->get_primary_key = gl_merge_text_get_primary_key;
134         merge_class->open            = gl_merge_text_open;
135         merge_class->close           = gl_merge_text_close;
136         merge_class->get_record      = gl_merge_text_get_record;
137         merge_class->copy            = gl_merge_text_copy;
138
139         gl_debug (DEBUG_MERGE, "END");
140 }
141
142
143 static void
144 gl_merge_text_init (glMergeText *merge_text)
145 {
146         gl_debug (DEBUG_MERGE, "START");
147
148         merge_text->priv = g_new0 (glMergeTextPrivate, 1);
149
150         merge_text->priv->keys = g_ptr_array_new ();
151
152         gl_debug (DEBUG_MERGE, "END");
153 }
154
155
156 static void
157 gl_merge_text_finalize (GObject *object)
158 {
159         glMergeText *merge_text = GL_MERGE_TEXT (object);
160
161         gl_debug (DEBUG_MERGE, "START");
162
163         g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
164
165         clear_keys (merge_text);
166         g_ptr_array_free (merge_text->priv->keys, TRUE);
167         g_free (merge_text->priv);
168
169         G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
170
171         gl_debug (DEBUG_MERGE, "END");
172 }
173
174
175 /*--------------------------------------------------------------------------*/
176 /* Set argument.                                                            */
177 /*--------------------------------------------------------------------------*/
178 static void
179 gl_merge_text_set_property (GObject      *object,
180                             guint         param_id,
181                             const GValue *value,
182                             GParamSpec   *pspec)
183 {
184         glMergeText *merge_text;
185
186         merge_text = GL_MERGE_TEXT (object);
187
188         switch (param_id) {
189
190         case ARG_DELIM:
191                 merge_text->priv->delim = g_value_get_schar (value);
192                 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
193                           merge_text->priv->delim);
194                 break;
195
196         case ARG_LINE1_HAS_KEYS:
197                 merge_text->priv->line1_has_keys = g_value_get_boolean (value);
198                 gl_debug (DEBUG_MERGE, "ARG \"line1_has_keys\" = \"%d\"",
199                           merge_text->priv->line1_has_keys);
200                 break;
201
202         default:
203                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
204                 break;
205
206         }
207
208 }
209
210
211 /*--------------------------------------------------------------------------*/
212 /* Get argument.                                                            */
213 /*--------------------------------------------------------------------------*/
214 static void
215 gl_merge_text_get_property (GObject     *object,
216                             guint        param_id,
217                             GValue      *value,
218                             GParamSpec  *pspec)
219 {
220         glMergeText *merge_text;
221
222         merge_text = GL_MERGE_TEXT (object);
223
224         switch (param_id) {
225
226         case ARG_DELIM:
227                 g_value_set_schar (value, merge_text->priv->delim);
228                 break;
229
230         case ARG_LINE1_HAS_KEYS:
231                 g_value_set_boolean (value, merge_text->priv->line1_has_keys);
232                 break;
233
234         default:
235                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
236                 break;
237
238         }
239
240 }
241
242
243 /*---------------------------------------------------------------------------*/
244 /* Lookup key name from zero based index.                                    */
245 /*---------------------------------------------------------------------------*/
246 static gchar *
247 key_from_index (glMergeText  *merge_text,
248                 gint          i_field)
249 {
250         if ( merge_text->priv->line1_has_keys &&
251              (i_field < merge_text->priv->keys->len) )
252         {
253                 return g_strdup (g_ptr_array_index (merge_text->priv->keys, i_field));
254         }
255         else
256         {
257                 return g_strdup_printf ("%d", i_field+1);
258         }
259 }
260
261
262 /*---------------------------------------------------------------------------*/
263 /* Clear stored keys.                                                        */
264 /*---------------------------------------------------------------------------*/
265 static void
266 clear_keys (glMergeText      *merge_text)
267 {
268         gint i;
269
270         for ( i = 0; i < merge_text->priv->keys->len; i++ )
271         {
272                 g_free (g_ptr_array_index (merge_text->priv->keys, i));
273         }
274         merge_text->priv->keys->len = 0;
275 }
276
277
278 /*--------------------------------------------------------------------------*/
279 /* Get key list.                                                            */
280 /*--------------------------------------------------------------------------*/
281 static GList *
282 gl_merge_text_get_key_list (const glMerge *merge)
283 {
284         glMergeText   *merge_text;
285         gint           i_field, n_fields;
286         GList         *key_list;
287         
288         gl_debug (DEBUG_MERGE, "BEGIN");
289
290         merge_text = GL_MERGE_TEXT (merge);
291
292         if ( merge_text->priv->line1_has_keys )
293         {
294                 n_fields = merge_text->priv->keys->len;
295         }
296         else
297         {
298                 n_fields = merge_text->priv->n_fields_max;
299         }
300
301         key_list = NULL;
302         for ( i_field=0; i_field < n_fields; i_field++ )
303         {
304                 key_list = g_list_append (key_list, key_from_index(merge_text, i_field));
305         }
306
307         gl_debug (DEBUG_MERGE, "END");
308
309         return key_list;
310 }
311
312
313 /*--------------------------------------------------------------------------*/
314 /* Get "primary" key.                                                       */
315 /*--------------------------------------------------------------------------*/
316 static gchar *
317 gl_merge_text_get_primary_key (const glMerge *merge)
318 {
319         /* For now, let's always assume the first column is the primary key. */
320         return key_from_index (GL_MERGE_TEXT (merge), 0);
321 }
322
323
324 /*--------------------------------------------------------------------------*/
325 /* Open merge source.                                                       */
326 /*--------------------------------------------------------------------------*/
327 static void
328 gl_merge_text_open (glMerge *merge)
329 {
330         glMergeText *merge_text;
331         gchar       *src;
332
333         GList       *line1_fields;
334         GList       *p;
335
336         merge_text = GL_MERGE_TEXT (merge);
337
338         src = gl_merge_get_src (merge);
339
340         if (src != NULL)
341         {
342                 if (g_utf8_strlen(src, -1) == 1 && src[0] == '-')
343                         merge_text->priv->fp = stdin;
344                 else
345                         merge_text->priv->fp = fopen (src, "r");
346
347                 g_free (src);
348
349                 clear_keys (merge_text);
350                 merge_text->priv->n_fields_max = 0;
351
352                 if ( merge_text->priv->line1_has_keys )
353                 {
354                         /*
355                          * Extract keys from first line and discard line
356                          */
357
358                         line1_fields = parse_line (merge_text->priv->fp, merge_text->priv->delim);
359                         for ( p = line1_fields; p != NULL; p = p->next )
360                         {
361                                 g_ptr_array_add (merge_text->priv->keys, g_strdup (p->data));
362                         }
363                         free_fields (&line1_fields);
364                 }
365
366         }
367
368
369 }
370
371
372 /*--------------------------------------------------------------------------*/
373 /* Close merge source.                                                      */
374 /*--------------------------------------------------------------------------*/
375 static void
376 gl_merge_text_close (glMerge *merge)
377 {
378         glMergeText *merge_text;
379
380         merge_text = GL_MERGE_TEXT (merge);
381
382         if (merge_text->priv->fp != NULL) {
383
384                 fclose (merge_text->priv->fp);
385                 merge_text->priv->fp = NULL;
386
387         }
388 }
389
390
391 /*--------------------------------------------------------------------------*/
392 /* Get next record from merge source, NULL if no records left (i.e EOF)     */
393 /*--------------------------------------------------------------------------*/
394 static glMergeRecord *
395 gl_merge_text_get_record (glMerge *merge)
396 {
397         glMergeText   *merge_text;
398         gchar          delim;
399         FILE          *fp;
400         glMergeRecord *record;
401         GList         *fields, *p;
402         gint           i_field;
403         glMergeField  *field;
404
405         merge_text = GL_MERGE_TEXT (merge);
406
407         delim = merge_text->priv->delim;
408         fp    = merge_text->priv->fp;
409
410         fields = parse_line (fp, delim);
411         if ( fields == NULL ) {
412                 return NULL;
413         }
414
415         record = g_new0 (glMergeRecord, 1);
416         record->select_flag = TRUE;
417         for (p=fields, i_field=0; p != NULL; p=p->next, i_field++) {
418
419                 field = g_new0 (glMergeField, 1);
420                 field->key = key_from_index (merge_text, i_field);
421 #ifndef CSV_ALWAYS_UTF8
422                 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
423 #else
424                 field->value = g_strdup (p->data);
425 #endif
426
427                 record->field_list = g_list_append (record->field_list, field);
428         }
429         free_fields (&fields);
430
431         if ( i_field > merge_text->priv->n_fields_max )
432         {
433                 merge_text->priv->n_fields_max = i_field;
434         }
435
436         return record;
437 }
438
439
440 /*---------------------------------------------------------------------------*/
441 /* Copy merge_text specific fields.                                          */
442 /*---------------------------------------------------------------------------*/
443 static void
444 gl_merge_text_copy (glMerge       *dst_merge,
445                     const glMerge *src_merge)
446 {
447         glMergeText *dst_merge_text;
448         glMergeText *src_merge_text;
449         gint         i;
450
451         dst_merge_text = GL_MERGE_TEXT (dst_merge);
452         src_merge_text = GL_MERGE_TEXT (src_merge);
453
454         dst_merge_text->priv->delim          = src_merge_text->priv->delim;
455         dst_merge_text->priv->line1_has_keys = src_merge_text->priv->line1_has_keys;
456
457         for ( i=0; i < src_merge_text->priv->keys->len; i++ )
458         {
459                 g_ptr_array_add (dst_merge_text->priv->keys,
460                                  g_strdup ((gchar *)g_ptr_array_index (src_merge_text->priv->keys, i)));
461         }
462
463         dst_merge_text->priv->n_fields_max   = src_merge_text->priv->n_fields_max;
464 }
465
466
467 /*---------------------------------------------------------------------------*/
468 /* PRIVATE.  Parse line.                                                     */
469 /*                                                                           */
470 /* Attempt to be a robust parser of various CSV (and similar) formats.       */
471 /*                                                                           */
472 /* Based on CSV format described in RFC 4180 section 2.                      */
473 /*                                                                           */
474 /* Additions to RFC 4180 rules:                                              */
475 /*   - delimeters and other special characters may be "escaped" by a leading */
476 /*     backslash (\)                                                         */
477 /*   - C escape sequences for newline (\n) and tab (\t) are also translated. */
478 /*   - if quoted text is not followed by a delimeter, any additional text is */
479 /*     concatenated with quoted portion.                                     */
480 /*                                                                           */
481 /* Returns a list of fields.  A blank line is considered a line with one     */
482 /* empty field.  Returns empty (NULL) when done.                             */
483 /*---------------------------------------------------------------------------*/
484 static GList *
485 parse_line (FILE  *fp,
486             gchar  delim )
487 {
488         GList   *list;
489         GString *field;
490         gint     c;
491         enum { DELIM,
492                QUOTED, QUOTED_QUOTE1, QUOTED_ESCAPED,
493                SIMPLE, SIMPLE_ESCAPED,
494                DONE } state;
495
496         if (fp == NULL) {
497                 return NULL;
498         }
499                
500         state = DELIM;
501         list  = NULL;
502         field = g_string_new( "" );
503         while ( state != DONE ) {
504                 c=getc (fp);
505
506                 switch (state) {
507
508                 case DELIM:
509                         switch (c) {
510                         case '\n':
511                                 /* last field is empty. */
512                                 list = g_list_append (list, g_strdup (""));
513                                 state = DONE;
514                                 break;
515                         case '\r':
516                                 /* ignore */
517                                 state = DELIM;
518                                 break;
519                         case EOF:
520                                 /* end of file, no more lines. */
521                                 state = DONE;
522                                 break;
523                         case '"':
524                                 /* start a quoted field. */
525                                 state = QUOTED;
526                                 break;
527                         case '\\':
528                                 /* simple field, but 1st character is an escape. */
529                                 state = SIMPLE_ESCAPED;
530                                 break;
531                         default:
532                                 if ( c == delim )
533                                 {
534                                         /* field is empty. */
535                                         list = g_list_append (list, g_strdup (""));
536                                         state = DELIM;
537                                 }
538                                 else
539                                 {
540                                         /* begining of a simple field. */
541                                         field = g_string_append_c (field, c);
542                                         state = SIMPLE;
543                                 }
544                                 break;
545                         }
546                         break;
547
548                 case QUOTED:
549                         switch (c) {
550                         case EOF:
551                                 /* File ended mid way through quoted item, truncate field. */
552                                 list = g_list_append (list, g_strdup (field->str));
553                                 state = DONE;
554                                 break;
555                         case '"':
556                                 /* Possible end of field, but could be 1st of a pair. */
557                                 state = QUOTED_QUOTE1;
558                                 break;
559                         case '\\':
560                                 /* Escape next character, or special escape, e.g. \n. */
561                                 state = QUOTED_ESCAPED;
562                                 break;
563                         default:
564                                 /* Use character literally. */
565                                 field = g_string_append_c (field, c);
566                                 break;
567                         }
568                         break;
569
570                 case QUOTED_QUOTE1:
571                         switch (c) {
572                         case '\n':
573                         case EOF:
574                                 /* line or file ended after quoted item */
575                                 list = g_list_append (list, g_strdup (field->str));
576                                 state = DONE;
577                                 break;
578                         case '"':
579                                 /* second quote, insert and stay quoted. */
580                                 field = g_string_append_c (field, c);
581                                 state = QUOTED;
582                                 break;
583                         case '\r':
584                                 /* ignore and go to fallback */
585                                 state = SIMPLE;
586                                 break;
587                         default:
588                                 if ( c == delim )
589                                 {
590                                         /* end of field. */
591                                         list = g_list_append (list, g_strdup (field->str));
592                                         field = g_string_assign( field, "" );
593                                         state = DELIM;
594                                 }
595                                 else
596                                 {
597                                         /* fallback if not a delim or another quote. */
598                                         field = g_string_append_c (field, c);
599                                         state = SIMPLE;
600                                 }
601                                 break;
602                         }
603                         break;
604
605                 case QUOTED_ESCAPED:
606                         switch (c) {
607                         case EOF:
608                                 /* File ended mid way through quoted item */
609                                 list = g_list_append (list, g_strdup (field->str));
610                                 state = DONE;
611                                 break;
612                         case 'n':
613                                 /* Decode "\n" as newline. */
614                                 field = g_string_append_c (field, '\n');
615                                 state = QUOTED;
616                                 break;
617                         case 't':
618                                 /* Decode "\t" as tab. */
619                                 field = g_string_append_c (field, '\t');
620                                 state = QUOTED;
621                                 break;
622                         default:
623                                 /* Use character literally. */
624                                 field = g_string_append_c (field, c);
625                                 state = QUOTED;
626                                 break;
627                         }
628                         break;
629
630                 case SIMPLE:
631                         switch (c) {
632                         case '\n':
633                         case EOF:
634                                 /* line or file ended */
635                                 list = g_list_append (list, g_strdup (field->str));
636                                 state = DONE;
637                                 break;
638                         case '\r':
639                                 /* ignore */
640                                 state = SIMPLE;
641                                 break;
642                         case '\\':
643                                 /* Escape next character, or special escape, e.g. \n. */
644                                 state = SIMPLE_ESCAPED;
645                                 break;
646                         default:
647                                 if ( c == delim )
648                                 {
649                                         /* end of field. */
650                                         list = g_list_append (list, g_strdup (field->str));
651                                         field = g_string_assign( field, "" );
652                                         state = DELIM;
653                                 }
654                                 else
655                                 {
656                                         /* Use character literally. */
657                                         field = g_string_append_c (field, c);
658                                         state = SIMPLE;
659                                 }
660                                 break;
661                         }
662                         break;
663
664                 case SIMPLE_ESCAPED:
665                         switch (c) {
666                         case EOF:
667                                 /* File ended mid way through quoted item */
668                                 list = g_list_append (list, g_strdup (field->str));
669                                 state = DONE;
670                                 break;
671                         case 'n':
672                                 /* Decode "\n" as newline. */
673                                 field = g_string_append_c (field, '\n');
674                                 state = SIMPLE;
675                                 break;
676                         case 't':
677                                 /* Decode "\t" as tab. */
678                                 field = g_string_append_c (field, '\t');
679                                 state = SIMPLE;
680                                 break;
681                         default:
682                                 /* Use character literally. */
683                                 field = g_string_append_c (field, c);
684                                 state = SIMPLE;
685                                 break;
686                         }
687                         break;
688
689                 default:
690                         g_assert_not_reached();
691                         break;
692                 }
693
694         }
695         g_string_free( field, TRUE );
696
697         return list;
698 }
699
700
701 /*---------------------------------------------------------------------------*/
702 /* Free list of fields.                                                      */
703 /*---------------------------------------------------------------------------*/
704 void
705 free_fields (GList ** list)
706 {
707         GList *p;
708
709         for (p = *list; p != NULL; p = p->next) {
710                 g_free (p->data);
711                 p->data = NULL;
712         }
713
714         g_list_free (*list);
715         *list = NULL;
716 }
717
718
719
720 /*
721  * Local Variables:       -- emacs
722  * mode: C                -- emacs
723  * c-basic-offset: 8      -- emacs
724  * tab-width: 8           -- emacs
725  * indent-tabs-mode: nil  -- emacs
726  * End:                   -- emacs
727  */