]> git.sur5r.net Git - glabels/blob - src/merge-text.c
Imported Upstream version 3.0.0
[glabels] / src / merge-text.c
1 /*
2  *  merge-text.c
3  *  Copyright (C) 2001-2009  Jim Evins <evins@snaught.com>.
4  *
5  *  This file is part of gLabels.
6  *
7  *  gLabels is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  gLabels is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with gLabels.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #include <config.h>
22
23 #include "merge-text.h"
24
25 #include <stdio.h>
26
27 #include "debug.h"
28
29 #define LINE_BUF_LEN 1024
30
31
32 /*===========================================*/
33 /* Private types                             */
34 /*===========================================*/
35
36 struct _glMergeTextPrivate {
37
38         gchar             delim;
39         gboolean          line1_has_keys;
40
41         FILE             *fp;
42
43         GPtrArray        *keys;
44         gint              n_fields_max;
45 };
46
47 enum {
48         LAST_SIGNAL
49 };
50
51 enum {
52         ARG_0,
53         ARG_DELIM,
54         ARG_LINE1_HAS_KEYS
55 };
56
57
58 /*===========================================*/
59 /* Private globals                           */
60 /*===========================================*/
61
62
63 /*===========================================*/
64 /* Local function prototypes                 */
65 /*===========================================*/
66
67 static void           gl_merge_text_finalize        (GObject          *object);
68
69 static void           gl_merge_text_set_property    (GObject          *object,
70                                                      guint             param_id,
71                                                      const GValue     *value,
72                                                      GParamSpec       *pspec);
73
74 static void           gl_merge_text_get_property    (GObject          *object,
75                                                      guint             param_id,
76                                                      GValue           *value,
77                                                      GParamSpec       *pspec);
78
79 static gchar         *key_from_index                (glMergeText      *merge_text,
80                                                      gint              i_field);
81 static void           clear_keys                    (glMergeText      *merge_text);
82
83 static GList         *gl_merge_text_get_key_list    (const glMerge    *merge);
84 static gchar         *gl_merge_text_get_primary_key (const glMerge    *merge);
85 static void           gl_merge_text_open            (glMerge          *merge);
86 static void           gl_merge_text_close           (glMerge          *merge);
87 static glMergeRecord *gl_merge_text_get_record      (glMerge          *merge);
88 static void           gl_merge_text_copy            (glMerge          *dst_merge,
89                                                      const glMerge    *src_merge);
90
91 static GList         *parse_line                    (FILE             *fp,
92                                                      gchar             delim);
93 static gchar         *parse_field                   (gchar            *raw_field);
94 static void           free_fields                   (GList           **fields);
95
96
97
98 /*****************************************************************************/
99 /* Boilerplate object stuff.                                                 */
100 /*****************************************************************************/
101 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE)
102
103
104 static void
105 gl_merge_text_class_init (glMergeTextClass *class)
106 {
107         GObjectClass *object_class = G_OBJECT_CLASS (class);
108         glMergeClass *merge_class  = GL_MERGE_CLASS (class);
109
110         gl_debug (DEBUG_MERGE, "START");
111
112         gl_merge_text_parent_class = g_type_class_peek_parent (class);
113
114         object_class->set_property = gl_merge_text_set_property;
115         object_class->get_property = gl_merge_text_get_property;
116
117         g_object_class_install_property
118                 (object_class,
119                  ARG_DELIM,
120                  g_param_spec_char ("delim", NULL, NULL,
121                                     0, 0x7F, ',',
122                                     (G_PARAM_READABLE | G_PARAM_WRITABLE)));
123
124         g_object_class_install_property
125                 (object_class,
126                  ARG_LINE1_HAS_KEYS,
127                  g_param_spec_boolean ("line1_has_keys", NULL, NULL,
128                                        FALSE,
129                                        (G_PARAM_READABLE | G_PARAM_WRITABLE)));
130
131         object_class->finalize = gl_merge_text_finalize;
132
133         merge_class->get_key_list    = gl_merge_text_get_key_list;
134         merge_class->get_primary_key = gl_merge_text_get_primary_key;
135         merge_class->open            = gl_merge_text_open;
136         merge_class->close           = gl_merge_text_close;
137         merge_class->get_record      = gl_merge_text_get_record;
138         merge_class->copy            = gl_merge_text_copy;
139
140         gl_debug (DEBUG_MERGE, "END");
141 }
142
143
144 static void
145 gl_merge_text_init (glMergeText *merge_text)
146 {
147         gl_debug (DEBUG_MERGE, "START");
148
149         merge_text->priv = g_new0 (glMergeTextPrivate, 1);
150
151         merge_text->priv->keys = g_ptr_array_new ();
152
153         gl_debug (DEBUG_MERGE, "END");
154 }
155
156
157 static void
158 gl_merge_text_finalize (GObject *object)
159 {
160         glMergeText *merge_text = GL_MERGE_TEXT (object);
161
162         gl_debug (DEBUG_MERGE, "START");
163
164         g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
165
166         clear_keys (merge_text);
167         g_ptr_array_free (merge_text->priv->keys, TRUE);
168         g_free (merge_text->priv);
169
170         G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
171
172         gl_debug (DEBUG_MERGE, "END");
173 }
174
175
176 /*--------------------------------------------------------------------------*/
177 /* Set argument.                                                            */
178 /*--------------------------------------------------------------------------*/
179 static void
180 gl_merge_text_set_property (GObject      *object,
181                             guint         param_id,
182                             const GValue *value,
183                             GParamSpec   *pspec)
184 {
185         glMergeText *merge_text;
186
187         merge_text = GL_MERGE_TEXT (object);
188
189         switch (param_id) {
190
191         case ARG_DELIM:
192                 merge_text->priv->delim = g_value_get_char (value);
193                 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
194                           merge_text->priv->delim);
195                 break;
196
197         case ARG_LINE1_HAS_KEYS:
198                 merge_text->priv->line1_has_keys = g_value_get_boolean (value);
199                 gl_debug (DEBUG_MERGE, "ARG \"line1_has_keys\" = \"%d\"",
200                           merge_text->priv->line1_has_keys);
201                 break;
202
203         default:
204                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
205                 break;
206
207         }
208
209 }
210
211
212 /*--------------------------------------------------------------------------*/
213 /* Get argument.                                                            */
214 /*--------------------------------------------------------------------------*/
215 static void
216 gl_merge_text_get_property (GObject     *object,
217                             guint        param_id,
218                             GValue      *value,
219                             GParamSpec  *pspec)
220 {
221         glMergeText *merge_text;
222
223         merge_text = GL_MERGE_TEXT (object);
224
225         switch (param_id) {
226
227         case ARG_DELIM:
228                 g_value_set_char (value, merge_text->priv->delim);
229                 break;
230
231         case ARG_LINE1_HAS_KEYS:
232                 g_value_set_boolean (value, merge_text->priv->line1_has_keys);
233                 break;
234
235         default:
236                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
237                 break;
238
239         }
240
241 }
242
243
244 /*---------------------------------------------------------------------------*/
245 /* Lookup key name from zero based index.                                    */
246 /*---------------------------------------------------------------------------*/
247 static gchar *
248 key_from_index (glMergeText  *merge_text,
249                 gint          i_field)
250 {
251         if ( merge_text->priv->line1_has_keys &&
252              (i_field < merge_text->priv->keys->len) )
253         {
254                 return g_strdup (g_ptr_array_index (merge_text->priv->keys, i_field));
255         }
256         else
257         {
258                 return g_strdup_printf ("%d", i_field+1);
259         }
260 }
261
262
263 /*---------------------------------------------------------------------------*/
264 /* Clear stored keys.                                                        */
265 /*---------------------------------------------------------------------------*/
266 static void
267 clear_keys (glMergeText      *merge_text)
268 {
269         gint i;
270
271         for ( i = 0; i < merge_text->priv->keys->len; i++ )
272         {
273                 g_free (g_ptr_array_index (merge_text->priv->keys, i));
274         }
275         merge_text->priv->keys->len = 0;
276 }
277
278
279 /*--------------------------------------------------------------------------*/
280 /* Get key list.                                                            */
281 /*--------------------------------------------------------------------------*/
282 static GList *
283 gl_merge_text_get_key_list (const glMerge *merge)
284 {
285         glMergeText   *merge_text;
286         gint           i_field, n_fields;
287         GList         *key_list;
288         
289         gl_debug (DEBUG_MERGE, "BEGIN");
290
291         merge_text = GL_MERGE_TEXT (merge);
292
293         if ( merge_text->priv->line1_has_keys )
294         {
295                 n_fields = merge_text->priv->keys->len;
296         }
297         else
298         {
299                 n_fields = merge_text->priv->n_fields_max;
300         }
301
302         key_list = NULL;
303         for ( i_field=0; i_field < n_fields; i_field++ )
304         {
305                 key_list = g_list_append (key_list, key_from_index(merge_text, i_field));
306         }
307
308         gl_debug (DEBUG_MERGE, "END");
309
310         return key_list;
311 }
312
313
314 /*--------------------------------------------------------------------------*/
315 /* Get "primary" key.                                                       */
316 /*--------------------------------------------------------------------------*/
317 static gchar *
318 gl_merge_text_get_primary_key (const glMerge *merge)
319 {
320         /* For now, let's always assume the first column is the primary key. */
321         return key_from_index (GL_MERGE_TEXT (merge), 0);
322 }
323
324
325 /*--------------------------------------------------------------------------*/
326 /* Open merge source.                                                       */
327 /*--------------------------------------------------------------------------*/
328 static void
329 gl_merge_text_open (glMerge *merge)
330 {
331         glMergeText *merge_text;
332         gchar       *src;
333
334         GList       *line1_fields;
335         GList       *p;
336
337         merge_text = GL_MERGE_TEXT (merge);
338
339         src = gl_merge_get_src (merge);
340
341         if (src != NULL)
342         {
343                 if (g_utf8_strlen(src, -1) == 1 && src[0] == '-')
344                         merge_text->priv->fp = stdin;
345                 else
346                         merge_text->priv->fp = fopen (src, "r");
347
348                 g_free (src);
349
350                 clear_keys (merge_text);
351                 merge_text->priv->n_fields_max = 0;
352
353                 if ( merge_text->priv->line1_has_keys )
354                 {
355                         /*
356                          * Extract keys from first line and discard line
357                          */
358
359                         line1_fields = parse_line (merge_text->priv->fp, merge_text->priv->delim);
360                         for ( p = line1_fields; p != NULL; p = p->next )
361                         {
362                                 g_ptr_array_add (merge_text->priv->keys, g_strdup (p->data));
363                         }
364                         free_fields (&line1_fields);
365                 }
366
367         }
368
369
370 }
371
372
373 /*--------------------------------------------------------------------------*/
374 /* Close merge source.                                                      */
375 /*--------------------------------------------------------------------------*/
376 static void
377 gl_merge_text_close (glMerge *merge)
378 {
379         glMergeText *merge_text;
380
381         merge_text = GL_MERGE_TEXT (merge);
382
383         if (merge_text->priv->fp != NULL) {
384
385                 fclose (merge_text->priv->fp);
386                 merge_text->priv->fp = NULL;
387
388         }
389 }
390
391
392 /*--------------------------------------------------------------------------*/
393 /* Get next record from merge source, NULL if no records left (i.e EOF)     */
394 /*--------------------------------------------------------------------------*/
395 static glMergeRecord *
396 gl_merge_text_get_record (glMerge *merge)
397 {
398         glMergeText   *merge_text;
399         gchar          delim;
400         FILE          *fp;
401         glMergeRecord *record;
402         GList         *fields, *p;
403         gint           i_field;
404         glMergeField  *field;
405
406         merge_text = GL_MERGE_TEXT (merge);
407
408         delim = merge_text->priv->delim;
409         fp    = merge_text->priv->fp;
410
411         fields = parse_line (fp, delim);
412         if ( fields == NULL ) {
413                 return NULL;
414         }
415
416         record = g_new0 (glMergeRecord, 1);
417         record->select_flag = TRUE;
418         for (p=fields, i_field=0; p != NULL; p=p->next, i_field++) {
419
420                 field = g_new0 (glMergeField, 1);
421                 field->key = key_from_index (merge_text, i_field);
422 #ifndef CSV_ALWAYS_UTF8
423                 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
424 #else
425                 field->value = g_strdup (p->data);
426 #endif
427
428                 record->field_list = g_list_append (record->field_list, field);
429         }
430         free_fields (&fields);
431
432         if ( i_field > merge_text->priv->n_fields_max )
433         {
434                 merge_text->priv->n_fields_max = i_field;
435         }
436
437         return record;
438 }
439
440
441 /*---------------------------------------------------------------------------*/
442 /* Copy merge_text specific fields.                                          */
443 /*---------------------------------------------------------------------------*/
444 static void
445 gl_merge_text_copy (glMerge       *dst_merge,
446                     const glMerge *src_merge)
447 {
448         glMergeText *dst_merge_text;
449         glMergeText *src_merge_text;
450         gint         i;
451
452         dst_merge_text = GL_MERGE_TEXT (dst_merge);
453         src_merge_text = GL_MERGE_TEXT (src_merge);
454
455         dst_merge_text->priv->delim          = src_merge_text->priv->delim;
456         dst_merge_text->priv->line1_has_keys = src_merge_text->priv->line1_has_keys;
457
458         for ( i=0; i < src_merge_text->priv->keys->len; i++ )
459         {
460                 g_ptr_array_add (dst_merge_text->priv->keys,
461                                  g_strdup ((gchar *)g_ptr_array_index (src_merge_text->priv->keys, i)));
462         }
463
464         dst_merge_text->priv->n_fields_max   = src_merge_text->priv->n_fields_max;
465 }
466
467
468 /*---------------------------------------------------------------------------*/
469 /* PRIVATE.  Parse line.                                                     */
470 /*                                                                           */
471 /* Attempt to be a robust parser of various CSV (and similar) formats.       */
472 /*                                                                           */
473 /* Based on CSV format described in RFC 4180 section 2.                      */
474 /*                                                                           */
475 /* Additions to RFC 4180 rules:                                              */
476 /*   - delimeters and other special characters may be "escaped" by a leading */
477 /*     backslash (\)                                                         */
478 /*   - C escape sequences for newline (\n) and tab (\t) are also translated. */
479 /*   - if quoted text is not followed by a delimeter, any additional text is */
480 /*     concatenated with quoted portion.                                     */
481 /*                                                                           */
482 /* Returns a list of fields.  A blank line is considered a line with one     */
483 /* empty field.  Returns empty (NULL) when done.                             */
484 /*---------------------------------------------------------------------------*/
485 static GList *
486 parse_line (FILE  *fp,
487             gchar  delim )
488 {
489         GList   *list;
490         GString *field;
491         gint     c;
492         enum { DELIM,
493                QUOTED, QUOTED_QUOTE1, QUOTED_ESCAPED,
494                SIMPLE, SIMPLE_ESCAPED,
495                DONE } state;
496
497         if (fp == NULL) {
498                 return NULL;
499         }
500                
501         state = DELIM;
502         list  = NULL;
503         field = g_string_new( "" );
504         while ( state != DONE ) {
505                 c=getc (fp);
506
507                 switch (state) {
508
509                 case DELIM:
510                         switch (c) {
511                         case '\n':
512                                 /* last field is empty. */
513                                 list = g_list_append (list, g_strdup (""));
514                                 state = DONE;
515                                 break;
516                         case '\r':
517                                 /* ignore */
518                                 state = DELIM;
519                                 break;
520                         case EOF:
521                                 /* end of file, no more lines. */
522                                 state = DONE;
523                                 break;
524                         case '"':
525                                 /* start a quoted field. */
526                                 state = QUOTED;
527                                 break;
528                         case '\\':
529                                 /* simple field, but 1st character is an escape. */
530                                 state = SIMPLE_ESCAPED;
531                                 break;
532                         default:
533                                 if ( c == delim )
534                                 {
535                                         /* field is empty. */
536                                         list = g_list_append (list, g_strdup (""));
537                                         state = DELIM;
538                                 }
539                                 else
540                                 {
541                                         /* begining of a simple field. */
542                                         field = g_string_append_c (field, c);
543                                         state = SIMPLE;
544                                 }
545                                 break;
546                         }
547                         break;
548
549                 case QUOTED:
550                         switch (c) {
551                         case EOF:
552                                 /* File ended mid way through quoted item, truncate field. */
553                                 list = g_list_append (list, g_strdup (field->str));
554                                 state = DONE;
555                                 break;
556                         case '"':
557                                 /* Possible end of field, but could be 1st of a pair. */
558                                 state = QUOTED_QUOTE1;
559                                 break;
560                         case '\\':
561                                 /* Escape next character, or special escape, e.g. \n. */
562                                 state = QUOTED_ESCAPED;
563                                 break;
564                         default:
565                                 /* Use character literally. */
566                                 field = g_string_append_c (field, c);
567                                 break;
568                         }
569                         break;
570
571                 case QUOTED_QUOTE1:
572                         switch (c) {
573                         case '\n':
574                         case EOF:
575                                 /* line or file ended after quoted item */
576                                 list = g_list_append (list, g_strdup (field->str));
577                                 state = DONE;
578                                 break;
579                         case '"':
580                                 /* second quote, insert and stay quoted. */
581                                 field = g_string_append_c (field, c);
582                                 state = QUOTED;
583                                 break;
584                         case '\r':
585                                 /* ignore and go to fallback */
586                                 state = SIMPLE;
587                                 break;
588                         default:
589                                 if ( c == delim )
590                                 {
591                                         /* end of field. */
592                                         list = g_list_append (list, g_strdup (field->str));
593                                         field = g_string_assign( field, "" );
594                                         state = DELIM;
595                                 }
596                                 else
597                                 {
598                                         /* fallback if not a delim or another quote. */
599                                         field = g_string_append_c (field, c);
600                                         state = SIMPLE;
601                                 }
602                                 break;
603                         }
604                         break;
605
606                 case QUOTED_ESCAPED:
607                         switch (c) {
608                         case EOF:
609                                 /* File ended mid way through quoted item */
610                                 list = g_list_append (list, g_strdup (field->str));
611                                 state = DONE;
612                                 break;
613                         case 'n':
614                                 /* Decode "\n" as newline. */
615                                 field = g_string_append_c (field, '\n');
616                                 state = QUOTED;
617                                 break;
618                         case 't':
619                                 /* Decode "\t" as tab. */
620                                 field = g_string_append_c (field, '\t');
621                                 state = QUOTED;
622                                 break;
623                         default:
624                                 /* Use character literally. */
625                                 field = g_string_append_c (field, c);
626                                 state = QUOTED;
627                                 break;
628                         }
629                         break;
630
631                 case SIMPLE:
632                         switch (c) {
633                         case '\n':
634                         case EOF:
635                                 /* line or file ended */
636                                 list = g_list_append (list, g_strdup (field->str));
637                                 state = DONE;
638                                 break;
639                         case '\r':
640                                 /* ignore */
641                                 state = SIMPLE;
642                                 break;
643                         case '\\':
644                                 /* Escape next character, or special escape, e.g. \n. */
645                                 state = SIMPLE_ESCAPED;
646                                 break;
647                         default:
648                                 if ( c == delim )
649                                 {
650                                         /* end of field. */
651                                         list = g_list_append (list, g_strdup (field->str));
652                                         field = g_string_assign( field, "" );
653                                         state = DELIM;
654                                 }
655                                 else
656                                 {
657                                         /* Use character literally. */
658                                         field = g_string_append_c (field, c);
659                                         state = SIMPLE;
660                                 }
661                                 break;
662                         }
663                         break;
664
665                 case SIMPLE_ESCAPED:
666                         switch (c) {
667                         case EOF:
668                                 /* File ended mid way through quoted item */
669                                 list = g_list_append (list, g_strdup (field->str));
670                                 state = DONE;
671                                 break;
672                         case 'n':
673                                 /* Decode "\n" as newline. */
674                                 field = g_string_append_c (field, '\n');
675                                 state = SIMPLE;
676                                 break;
677                         case 't':
678                                 /* Decode "\t" as tab. */
679                                 field = g_string_append_c (field, '\t');
680                                 state = SIMPLE;
681                                 break;
682                         default:
683                                 /* Use character literally. */
684                                 field = g_string_append_c (field, c);
685                                 state = SIMPLE;
686                                 break;
687                         }
688                         break;
689
690                 default:
691                         g_assert_not_reached();
692                         break;
693                 }
694
695         }
696         g_string_free( field, TRUE );
697
698         return list;
699 }
700
701
702 /*---------------------------------------------------------------------------*/
703 /* Free list of fields.                                                      */
704 /*---------------------------------------------------------------------------*/
705 void
706 free_fields (GList ** list)
707 {
708         GList *p;
709
710         for (p = *list; p != NULL; p = p->next) {
711                 g_free (p->data);
712                 p->data = NULL;
713         }
714
715         g_list_free (*list);
716         *list = NULL;
717 }
718
719
720
721 /*
722  * Local Variables:       -- emacs
723  * mode: C                -- emacs
724  * c-basic-offset: 8      -- emacs
725  * tab-width: 8           -- emacs
726  * indent-tabs-mode: nil  -- emacs
727  * End:                   -- emacs
728  */