3 * Copyright (C) 2001-2009 Jim Evins <evins@snaught.com>.
5 * This file is part of gLabels.
7 * gLabels is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * gLabels is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with gLabels. If not, see <http://www.gnu.org/licenses/>.
23 #include "merge-text.h"
29 #define LINE_BUF_LEN 1024
32 /*===========================================*/
34 /*===========================================*/
36 struct _glMergeTextPrivate {
39 gboolean line1_has_keys;
58 /*===========================================*/
60 /*===========================================*/
63 /*===========================================*/
64 /* Local function prototypes */
65 /*===========================================*/
67 static void gl_merge_text_finalize (GObject *object);
69 static void gl_merge_text_set_property (GObject *object,
74 static void gl_merge_text_get_property (GObject *object,
79 static gchar *key_from_index (glMergeText *merge_text,
81 static void clear_keys (glMergeText *merge_text);
83 static GList *gl_merge_text_get_key_list (const glMerge *merge);
84 static gchar *gl_merge_text_get_primary_key (const glMerge *merge);
85 static void gl_merge_text_open (glMerge *merge);
86 static void gl_merge_text_close (glMerge *merge);
87 static glMergeRecord *gl_merge_text_get_record (glMerge *merge);
88 static void gl_merge_text_copy (glMerge *dst_merge,
89 const glMerge *src_merge);
91 static GList *parse_line (FILE *fp,
93 static gchar *parse_field (gchar *raw_field);
94 static void free_fields (GList **fields);
98 /*****************************************************************************/
99 /* Boilerplate object stuff. */
100 /*****************************************************************************/
101 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE);
105 gl_merge_text_class_init (glMergeTextClass *class)
107 GObjectClass *object_class = G_OBJECT_CLASS (class);
108 glMergeClass *merge_class = GL_MERGE_CLASS (class);
110 gl_debug (DEBUG_MERGE, "START");
112 gl_merge_text_parent_class = g_type_class_peek_parent (class);
114 object_class->set_property = gl_merge_text_set_property;
115 object_class->get_property = gl_merge_text_get_property;
117 g_object_class_install_property
120 g_param_spec_char ("delim", NULL, NULL,
122 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
124 g_object_class_install_property
127 g_param_spec_boolean ("line1_has_keys", NULL, NULL,
129 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
131 object_class->finalize = gl_merge_text_finalize;
133 merge_class->get_key_list = gl_merge_text_get_key_list;
134 merge_class->get_primary_key = gl_merge_text_get_primary_key;
135 merge_class->open = gl_merge_text_open;
136 merge_class->close = gl_merge_text_close;
137 merge_class->get_record = gl_merge_text_get_record;
138 merge_class->copy = gl_merge_text_copy;
140 gl_debug (DEBUG_MERGE, "END");
145 gl_merge_text_init (glMergeText *merge_text)
147 gl_debug (DEBUG_MERGE, "START");
149 merge_text->priv = g_new0 (glMergeTextPrivate, 1);
151 merge_text->priv->keys = g_ptr_array_new ();
153 gl_debug (DEBUG_MERGE, "END");
158 gl_merge_text_finalize (GObject *object)
160 glMergeText *merge_text = GL_MERGE_TEXT (object);
162 gl_debug (DEBUG_MERGE, "START");
164 g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
166 clear_keys (merge_text);
167 g_ptr_array_free (merge_text->priv->keys, TRUE);
168 g_free (merge_text->priv);
170 G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
172 gl_debug (DEBUG_MERGE, "END");
176 /*--------------------------------------------------------------------------*/
178 /*--------------------------------------------------------------------------*/
180 gl_merge_text_set_property (GObject *object,
185 glMergeText *merge_text;
187 merge_text = GL_MERGE_TEXT (object);
192 merge_text->priv->delim = g_value_get_char (value);
193 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
194 merge_text->priv->delim);
197 case ARG_LINE1_HAS_KEYS:
198 merge_text->priv->line1_has_keys = g_value_get_boolean (value);
199 gl_debug (DEBUG_MERGE, "ARG \"line1_has_keys\" = \"%d\"",
200 merge_text->priv->line1_has_keys);
204 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
212 /*--------------------------------------------------------------------------*/
214 /*--------------------------------------------------------------------------*/
216 gl_merge_text_get_property (GObject *object,
221 glMergeText *merge_text;
223 merge_text = GL_MERGE_TEXT (object);
228 g_value_set_char (value, merge_text->priv->delim);
231 case ARG_LINE1_HAS_KEYS:
232 g_value_set_boolean (value, merge_text->priv->line1_has_keys);
236 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
244 /*---------------------------------------------------------------------------*/
245 /* Lookup key name from zero based index. */
246 /*---------------------------------------------------------------------------*/
248 key_from_index (glMergeText *merge_text,
251 if ( merge_text->priv->line1_has_keys &&
252 (i_field < merge_text->priv->keys->len) )
254 return g_strdup (g_ptr_array_index (merge_text->priv->keys, i_field));
258 return g_strdup_printf ("%d", i_field+1);
263 /*---------------------------------------------------------------------------*/
264 /* Clear stored keys. */
265 /*---------------------------------------------------------------------------*/
267 clear_keys (glMergeText *merge_text)
271 for ( i = 0; i < merge_text->priv->keys->len; i++ )
273 g_free (g_ptr_array_index (merge_text->priv->keys, i));
275 merge_text->priv->keys->len = 0;
279 /*--------------------------------------------------------------------------*/
281 /*--------------------------------------------------------------------------*/
283 gl_merge_text_get_key_list (const glMerge *merge)
285 glMergeText *merge_text;
286 gint i_field, n_fields;
289 gl_debug (DEBUG_MERGE, "BEGIN");
291 merge_text = GL_MERGE_TEXT (merge);
293 if ( merge_text->priv->line1_has_keys )
295 n_fields = merge_text->priv->keys->len;
299 n_fields = merge_text->priv->n_fields_max;
303 for ( i_field=0; i_field < n_fields; i_field++ )
305 key_list = g_list_append (key_list, key_from_index(merge_text, i_field));
308 gl_debug (DEBUG_MERGE, "END");
314 /*--------------------------------------------------------------------------*/
315 /* Get "primary" key. */
316 /*--------------------------------------------------------------------------*/
318 gl_merge_text_get_primary_key (const glMerge *merge)
320 /* For now, let's always assume the first column is the primary key. */
321 return key_from_index (GL_MERGE_TEXT (merge), 0);
325 /*--------------------------------------------------------------------------*/
326 /* Open merge source. */
327 /*--------------------------------------------------------------------------*/
329 gl_merge_text_open (glMerge *merge)
331 glMergeText *merge_text;
337 merge_text = GL_MERGE_TEXT (merge);
339 src = gl_merge_get_src (merge);
343 if (g_utf8_strlen(src, -1) == 1 && src[0] == '-')
344 merge_text->priv->fp = stdin;
346 merge_text->priv->fp = fopen (src, "r");
350 clear_keys (merge_text);
351 merge_text->priv->n_fields_max = 0;
353 if ( merge_text->priv->line1_has_keys )
356 * Extract keys from first line and discard line
359 line1_fields = parse_line (merge_text->priv->fp, merge_text->priv->delim);
360 for ( p = line1_fields; p != NULL; p = p->next )
362 g_ptr_array_add (merge_text->priv->keys, g_strdup (p->data));
364 free_fields (&line1_fields);
373 /*--------------------------------------------------------------------------*/
374 /* Close merge source. */
375 /*--------------------------------------------------------------------------*/
377 gl_merge_text_close (glMerge *merge)
379 glMergeText *merge_text;
381 merge_text = GL_MERGE_TEXT (merge);
383 if (merge_text->priv->fp != NULL) {
385 fclose (merge_text->priv->fp);
386 merge_text->priv->fp = NULL;
392 /*--------------------------------------------------------------------------*/
393 /* Get next record from merge source, NULL if no records left (i.e EOF) */
394 /*--------------------------------------------------------------------------*/
395 static glMergeRecord *
396 gl_merge_text_get_record (glMerge *merge)
398 glMergeText *merge_text;
401 glMergeRecord *record;
406 merge_text = GL_MERGE_TEXT (merge);
408 delim = merge_text->priv->delim;
409 fp = merge_text->priv->fp;
411 fields = parse_line (fp, delim);
412 if ( fields == NULL ) {
416 record = g_new0 (glMergeRecord, 1);
417 record->select_flag = TRUE;
418 for (p=fields, i_field=0; p != NULL; p=p->next, i_field++) {
420 field = g_new0 (glMergeField, 1);
421 field->key = key_from_index (merge_text, i_field);
422 #ifndef CSV_ALWAYS_UTF8
423 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
425 field->value = g_strdup (p->data);
428 record->field_list = g_list_append (record->field_list, field);
430 free_fields (&fields);
432 if ( i_field > merge_text->priv->n_fields_max )
434 merge_text->priv->n_fields_max = i_field;
441 /*---------------------------------------------------------------------------*/
442 /* Copy merge_text specific fields. */
443 /*---------------------------------------------------------------------------*/
445 gl_merge_text_copy (glMerge *dst_merge,
446 const glMerge *src_merge)
448 glMergeText *dst_merge_text;
449 glMergeText *src_merge_text;
452 dst_merge_text = GL_MERGE_TEXT (dst_merge);
453 src_merge_text = GL_MERGE_TEXT (src_merge);
455 dst_merge_text->priv->delim = src_merge_text->priv->delim;
456 dst_merge_text->priv->line1_has_keys = src_merge_text->priv->line1_has_keys;
458 for ( i=0; i < src_merge_text->priv->keys->len; i++ )
460 g_ptr_array_add (dst_merge_text->priv->keys,
461 g_strdup ((gchar *)g_ptr_array_index (src_merge_text->priv->keys, i)));
464 dst_merge_text->priv->n_fields_max = src_merge_text->priv->n_fields_max;
468 /*---------------------------------------------------------------------------*/
469 /* PRIVATE. Parse line. */
471 /* Attempt to be a robust parser of various CSV (and similar) formats. */
473 /* Split into fields, accounting for: */
474 /* - delimeters may be embedded in quoted text (") */
475 /* - delimeters may be "escaped" by a leading backslash (\) */
476 /* - quotes may be embedded in quoted text as two adjacent quotes ("") */
477 /* - quotes may be "escaped" either within or outside of quoted text. */
478 /* - newlines may be embedded in quoted text, allowing a field to span */
479 /* more than one line. */
481 /* This function does not do any parsing of the individual fields, other */
482 /* than to correctly interpet delimeters. Actual parsing of the individual */
483 /* fields is done in parse_field(). */
485 /* Returns a list of fields. A blank line is considered a line with one */
486 /* empty field. Returns empty (NULL) when done. */
487 /*---------------------------------------------------------------------------*/
489 parse_line (FILE *fp,
495 enum { BEGIN, NORMAL, QUOTED, QUOTED_QUOTE1,
496 NORMAL_ESCAPED, QUOTED_ESCAPED, DONE } state;
503 string = g_string_new( "" );
504 while ( state != DONE ) {
512 /* first field is empty. */
513 list = g_list_append (list, g_strdup (""));
519 string = g_string_append_c (string, c);
523 string = g_string_append_c (string, c);
524 state = NORMAL_ESCAPED;
527 /* treat as one empty field. */
528 list = g_list_append (list, g_strdup (""));
532 /* end of file, no more lines. */
536 string = g_string_append_c (string, c);
545 list = g_list_append (list, parse_field (string->str));
546 string = g_string_assign( string, "" );
552 string = g_string_append_c (string, c);
556 string = g_string_append_c (string, c);
557 state = NORMAL_ESCAPED;
561 list = g_list_append (list, parse_field (string->str));
565 string = g_string_append_c (string, c);
574 string = g_string_append_c (string, c);
575 state = QUOTED_QUOTE1;
578 string = g_string_append_c (string, c);
579 state = QUOTED_ESCAPED;
582 /* File ended mid way through quoted item */
583 list = g_list_append (list, parse_field (string->str));
587 string = g_string_append_c (string, c);
595 list = g_list_append (list, parse_field (string->str));
596 string = g_string_assign( string, "" );
602 /* insert quotes in string, stay quoted. */
603 string = g_string_append_c (string, c);
608 /* line or file ended after quoted item */
609 list = g_list_append (list, parse_field (string->str));
613 string = g_string_append_c (string, c);
622 /* File ended mid way through quoted item */
623 list = g_list_append (list, parse_field (string->str));
627 string = g_string_append_c (string, c);
636 /* File ended mid way through quoted item */
637 list = g_list_append (list, parse_field (string->str));
641 string = g_string_append_c (string, c);
648 g_assert_not_reached();
653 g_string_free( string, TRUE );
659 /*---------------------------------------------------------------------------*/
660 /* PRIVATE. Parse field. */
662 /* - Strip leading and trailing white space, unless quoted. */
663 /* - Strip CR, unless escaped. */
664 /* - Expand '\n' and '\t' into newline and tab characters. */
665 /* - Remove quotes, unless escaped (\" anywhere or "" within quotes) */
666 /*---------------------------------------------------------------------------*/
668 parse_field (gchar *raw_field)
671 gchar *pass1_field, *c, *field;
672 enum { NORMAL, NORMAL_ESCAPED, QUOTED, QUOTED_ESCAPED, QUOTED_QUOTE1} state;
676 * Pass 1: remove leading and trailing spaces.
678 pass1_field = g_strdup (raw_field);
679 g_strstrip (pass1_field);
682 * Pass 2: resolve quoting and escaping.
685 string = g_string_new( "" );
686 for ( c=pass1_field; *c != 0; c++ )
693 state = NORMAL_ESCAPED;
702 string = g_string_append_c (string, *c);
710 string = g_string_append_c (string, '\n');
714 string = g_string_append_c (string, '\t');
718 string = g_string_append_c (string, *c);
727 state = QUOTED_ESCAPED;
730 state = QUOTED_QUOTE1;
736 string = g_string_append_c (string, *c);
744 string = g_string_append_c (string, '\n');
748 string = g_string_append_c (string, '\t');
752 string = g_string_append_c (string, *c);
761 /* insert quotes in string, stay quoted. */
762 string = g_string_append_c (string, *c);
766 /* Strip CR, return to QUOTED. */
770 string = g_string_append_c (string, *c);
777 g_assert_not_reached();
783 field = g_strdup (string->str);
784 g_string_free( string, TRUE );
785 g_free (pass1_field);
791 /*---------------------------------------------------------------------------*/
792 /* Free list of fields. */
793 /*---------------------------------------------------------------------------*/
795 free_fields (GList ** list)
799 for (p = *list; p != NULL; p = p->next) {
811 * Local Variables: -- emacs
813 * c-basic-offset: 8 -- emacs
814 * tab-width: 8 -- emacs
815 * indent-tabs-mode: nil -- emacs