3 * Copyright (C) 2001-2009 Jim Evins <evins@snaught.com>.
5 * This file is part of gLabels.
7 * gLabels is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * gLabels is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with gLabels. If not, see <http://www.gnu.org/licenses/>.
23 #include "merge-text.h"
29 #define LINE_BUF_LEN 1024
32 /*===========================================*/
34 /*===========================================*/
36 struct _glMergeTextPrivate {
51 /*===========================================*/
53 /*===========================================*/
56 /*===========================================*/
57 /* Local function prototypes */
58 /*===========================================*/
60 static void gl_merge_text_finalize (GObject *object);
62 static void gl_merge_text_set_property (GObject *object,
67 static void gl_merge_text_get_property (GObject *object,
72 static GList *gl_merge_text_get_key_list (glMerge *merge);
73 static gchar *gl_merge_text_get_primary_key (glMerge *merge);
74 static void gl_merge_text_open (glMerge *merge);
75 static void gl_merge_text_close (glMerge *merge);
76 static glMergeRecord *gl_merge_text_get_record (glMerge *merge);
77 static void gl_merge_text_copy (glMerge *dst_merge,
80 static GList *parse_line (FILE *fp,
82 static gchar *parse_field (gchar *raw_field);
83 static void free_fields (GList **fields);
86 /*****************************************************************************/
87 /* Boilerplate object stuff. */
88 /*****************************************************************************/
89 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE);
93 gl_merge_text_class_init (glMergeTextClass *class)
95 GObjectClass *object_class = G_OBJECT_CLASS (class);
96 glMergeClass *merge_class = GL_MERGE_CLASS (class);
98 gl_debug (DEBUG_MERGE, "START");
100 gl_merge_text_parent_class = g_type_class_peek_parent (class);
102 object_class->set_property = gl_merge_text_set_property;
103 object_class->get_property = gl_merge_text_get_property;
105 g_object_class_install_property
108 g_param_spec_char ("delim", NULL, NULL,
110 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
112 object_class->finalize = gl_merge_text_finalize;
114 merge_class->get_key_list = gl_merge_text_get_key_list;
115 merge_class->get_primary_key = gl_merge_text_get_primary_key;
116 merge_class->open = gl_merge_text_open;
117 merge_class->close = gl_merge_text_close;
118 merge_class->get_record = gl_merge_text_get_record;
119 merge_class->copy = gl_merge_text_copy;
121 gl_debug (DEBUG_MERGE, "END");
126 gl_merge_text_init (glMergeText *merge_text)
128 gl_debug (DEBUG_MERGE, "START");
130 merge_text->priv = g_new0 (glMergeTextPrivate, 1);
132 gl_debug (DEBUG_MERGE, "END");
137 gl_merge_text_finalize (GObject *object)
139 glMergeText *merge_text = GL_MERGE_TEXT (object);
141 gl_debug (DEBUG_MERGE, "START");
143 g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
145 g_free (merge_text->priv);
147 G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
149 gl_debug (DEBUG_MERGE, "END");
153 /*--------------------------------------------------------------------------*/
155 /*--------------------------------------------------------------------------*/
157 gl_merge_text_set_property (GObject *object,
162 glMergeText *merge_text;
164 merge_text = GL_MERGE_TEXT (object);
169 merge_text->priv->delim = g_value_get_char (value);
170 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
171 merge_text->priv->delim);
175 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
183 /*--------------------------------------------------------------------------*/
185 /*--------------------------------------------------------------------------*/
187 gl_merge_text_get_property (GObject *object,
192 glMergeText *merge_text;
194 merge_text = GL_MERGE_TEXT (object);
199 g_value_set_char (value, merge_text->priv->delim);
203 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
211 /*--------------------------------------------------------------------------*/
213 /*--------------------------------------------------------------------------*/
215 gl_merge_text_get_key_list (glMerge *merge)
217 glMergeText *merge_text;
218 GList *record_list, *p_rec;
219 glMergeRecord *record;
221 gint i_field, n_fields, n_fields_max = 0;
224 /* Field keys are simply column numbers. */
226 gl_debug (DEBUG_MERGE, "BEGIN");
228 merge_text = GL_MERGE_TEXT (merge);
230 record_list = (GList *)gl_merge_get_record_list (merge);
232 for ( p_rec=record_list; p_rec!=NULL; p_rec=p_rec->next ) {
233 record = (glMergeRecord *)p_rec->data;
236 for ( p_field=record->field_list; p_field!=NULL; p_field=p_field->next ) {
239 if ( n_fields > n_fields_max ) n_fields_max = n_fields;
243 for (i_field=1; i_field <= n_fields_max; i_field++) {
244 key_list = g_list_append (key_list, g_strdup_printf ("%d", i_field));
248 gl_debug (DEBUG_MERGE, "END");
254 /*--------------------------------------------------------------------------*/
255 /* Get "primary" key. */
256 /*--------------------------------------------------------------------------*/
258 gl_merge_text_get_primary_key (glMerge *merge)
260 /* For now, let's always assume the first column is the primary key. */
261 return g_strdup ("1");
265 /*--------------------------------------------------------------------------*/
266 /* Open merge source. */
267 /*--------------------------------------------------------------------------*/
269 gl_merge_text_open (glMerge *merge)
271 glMergeText *merge_text;
274 merge_text = GL_MERGE_TEXT (merge);
276 src = gl_merge_get_src (merge);
279 merge_text->priv->fp = fopen (src, "r");
286 /*--------------------------------------------------------------------------*/
287 /* Close merge source. */
288 /*--------------------------------------------------------------------------*/
290 gl_merge_text_close (glMerge *merge)
292 glMergeText *merge_text;
294 merge_text = GL_MERGE_TEXT (merge);
296 if (merge_text->priv->fp != NULL) {
298 fclose (merge_text->priv->fp);
299 merge_text->priv->fp = NULL;
305 /*--------------------------------------------------------------------------*/
306 /* Get next record from merge source, NULL if no records left (i.e EOF) */
307 /*--------------------------------------------------------------------------*/
308 static glMergeRecord *
309 gl_merge_text_get_record (glMerge *merge)
311 glMergeText *merge_text;
314 glMergeRecord *record;
319 merge_text = GL_MERGE_TEXT (merge);
321 delim = merge_text->priv->delim;
322 fp = merge_text->priv->fp;
328 fields = parse_line (fp, delim);
329 if ( fields == NULL ) {
333 record = g_new0 (glMergeRecord, 1);
334 record->select_flag = TRUE;
336 for (p=fields; p != NULL; p=p->next) {
338 field = g_new0 (glMergeField, 1);
339 field->key = g_strdup_printf ("%d", i_field++);
340 #ifndef CSV_ALWAYS_UTF8
341 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
343 field->value = g_strdup (p->data);
346 record->field_list = g_list_append (record->field_list, field);
348 free_fields (&fields);
354 /*---------------------------------------------------------------------------*/
355 /* Copy merge_text specific fields. */
356 /*---------------------------------------------------------------------------*/
358 gl_merge_text_copy (glMerge *dst_merge,
361 glMergeText *dst_merge_text;
362 glMergeText *src_merge_text;
364 dst_merge_text = GL_MERGE_TEXT (dst_merge);
365 src_merge_text = GL_MERGE_TEXT (src_merge);
367 dst_merge_text->priv->delim = src_merge_text->priv->delim;
371 /*---------------------------------------------------------------------------*/
372 /* PRIVATE. Parse line. */
374 /* Attempt to be a robust parser of various CSV (and similar) formats. */
376 /* Split into fields, accounting for: */
377 /* - delimeters may be embedded in quoted text (") */
378 /* - delimeters may be "escaped" by a leading backslash (\) */
379 /* - quotes may be embedded in quoted text as two adjacent quotes ("") */
380 /* - quotes may be "escaped" either within or outside of quoted text. */
381 /* - newlines may be embedded in quoted text, allowing a field to span */
382 /* more than one line. */
384 /* This function does not do any parsing of the individual fields, other */
385 /* than to correctly interpet delimeters. Actual parsing of the individual */
386 /* fields is done in parse_field(). */
388 /* Returns a list of fields. A blank line is considered a line with one */
389 /* empty field. Returns empty (NULL) when done. */
390 /*---------------------------------------------------------------------------*/
392 parse_line (FILE *fp,
398 enum { BEGIN, NORMAL, QUOTED, QUOTED_QUOTE1,
399 NORMAL_ESCAPED, QUOTED_ESCAPED, DONE } state;
402 string = g_string_new( "" );
403 while ( state != DONE ) {
411 /* first field is empty. */
412 list = g_list_append (list, g_strdup (""));
418 string = g_string_append_c (string, c);
422 string = g_string_append_c (string, c);
423 state = NORMAL_ESCAPED;
426 /* treat as one empty field. */
427 list = g_list_append (list, g_strdup (""));
431 /* end of file, no more lines. */
435 string = g_string_append_c (string, c);
444 list = g_list_append (list, parse_field (string->str));
445 string = g_string_assign( string, "" );
451 string = g_string_append_c (string, c);
455 string = g_string_append_c (string, c);
456 state = NORMAL_ESCAPED;
460 list = g_list_append (list, parse_field (string->str));
464 string = g_string_append_c (string, c);
473 string = g_string_append_c (string, c);
474 state = QUOTED_QUOTE1;
477 string = g_string_append_c (string, c);
478 state = QUOTED_ESCAPED;
481 /* File ended mid way through quoted item */
482 list = g_list_append (list, parse_field (string->str));
486 string = g_string_append_c (string, c);
494 list = g_list_append (list, parse_field (string->str));
495 string = g_string_assign( string, "" );
501 /* insert quotes in string, stay quoted. */
502 string = g_string_append_c (string, c);
507 /* line or file ended after quoted item */
508 list = g_list_append (list, parse_field (string->str));
512 string = g_string_append_c (string, c);
521 /* File ended mid way through quoted item */
522 list = g_list_append (list, parse_field (string->str));
526 string = g_string_append_c (string, c);
535 /* File ended mid way through quoted item */
536 list = g_list_append (list, parse_field (string->str));
540 string = g_string_append_c (string, c);
547 g_assert_not_reached();
552 g_string_free( string, TRUE );
558 /*---------------------------------------------------------------------------*/
559 /* PRIVATE. Parse field. */
561 /* - Strip leading and trailing white space, unless quoted. */
562 /* - Strip CR, unless escaped. */
563 /* - Expand '\n' and '\t' into newline and tab characters. */
564 /* - Remove quotes, unless escaped (\" anywhere or "" within quotes) */
565 /*---------------------------------------------------------------------------*/
567 parse_field (gchar *raw_field)
570 gchar *pass1_field, *c, *field;
571 enum { NORMAL, NORMAL_ESCAPED, QUOTED, QUOTED_ESCAPED, QUOTED_QUOTE1} state;
575 * Pass 1: remove leading and trailing spaces.
577 pass1_field = g_strdup (raw_field);
578 g_strstrip (pass1_field);
581 * Pass 2: resolve quoting and escaping.
584 string = g_string_new( "" );
585 for ( c=pass1_field; *c != 0; c++ )
592 state = NORMAL_ESCAPED;
601 string = g_string_append_c (string, *c);
609 string = g_string_append_c (string, '\n');
613 string = g_string_append_c (string, '\t');
617 string = g_string_append_c (string, *c);
626 state = QUOTED_ESCAPED;
629 state = QUOTED_QUOTE1;
635 string = g_string_append_c (string, *c);
643 string = g_string_append_c (string, '\n');
647 string = g_string_append_c (string, '\t');
651 string = g_string_append_c (string, *c);
660 /* insert quotes in string, stay quoted. */
661 string = g_string_append_c (string, *c);
665 /* Strip CR, return to QUOTED. */
669 string = g_string_append_c (string, *c);
676 g_assert_not_reached();
682 field = g_strdup (string->str);
683 g_string_free( string, TRUE );
684 g_free (pass1_field);
690 /*---------------------------------------------------------------------------*/
691 /* Free list of fields. */
692 /*---------------------------------------------------------------------------*/
694 free_fields (GList ** list)
698 for (p = *list; p != NULL; p = p->next) {
710 * Local Variables: -- emacs
712 * c-basic-offset: 8 -- emacs
713 * tab-width: 8 -- emacs
714 * indent-tabs-mode: nil -- emacs