1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 8 -*- */
4 * (GLABELS) Label and Business Card Creation program for GNOME
6 * merge_text.c: text-file merge backend module
8 * Copyright (C) 2001 Jim Evins <evins@snaught.com>.
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or
13 * (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
27 #include "merge-text.h"
33 #define LINE_BUF_LEN 1024
35 /*===========================================*/
37 /*===========================================*/
39 struct _glMergeTextPrivate {
53 /*===========================================*/
55 /*===========================================*/
58 /*===========================================*/
59 /* Local function prototypes */
60 /*===========================================*/
62 static void gl_merge_text_finalize (GObject *object);
64 static void gl_merge_text_set_property (GObject *object,
69 static void gl_merge_text_get_property (GObject *object,
74 static GList *gl_merge_text_get_key_list (glMerge *merge);
75 static gchar *gl_merge_text_get_primary_key (glMerge *merge);
76 static void gl_merge_text_open (glMerge *merge);
77 static void gl_merge_text_close (glMerge *merge);
78 static glMergeRecord *gl_merge_text_get_record (glMerge *merge);
79 static void gl_merge_text_copy (glMerge *dst_merge,
82 static GList *parse_line (FILE *fp,
84 static gchar *parse_field (gchar *raw_field);
85 static void free_fields (GList **fields);
88 /*****************************************************************************/
89 /* Boilerplate object stuff. */
90 /*****************************************************************************/
91 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE);
94 gl_merge_text_class_init (glMergeTextClass *class)
96 GObjectClass *object_class = G_OBJECT_CLASS (class);
97 glMergeClass *merge_class = GL_MERGE_CLASS (class);
99 gl_debug (DEBUG_MERGE, "START");
101 gl_merge_text_parent_class = g_type_class_peek_parent (class);
103 object_class->set_property = gl_merge_text_set_property;
104 object_class->get_property = gl_merge_text_get_property;
106 g_object_class_install_property
109 g_param_spec_char ("delim", NULL, NULL,
111 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
113 object_class->finalize = gl_merge_text_finalize;
115 merge_class->get_key_list = gl_merge_text_get_key_list;
116 merge_class->get_primary_key = gl_merge_text_get_primary_key;
117 merge_class->open = gl_merge_text_open;
118 merge_class->close = gl_merge_text_close;
119 merge_class->get_record = gl_merge_text_get_record;
120 merge_class->copy = gl_merge_text_copy;
122 gl_debug (DEBUG_MERGE, "END");
126 gl_merge_text_init (glMergeText *merge_text)
128 gl_debug (DEBUG_MERGE, "START");
130 merge_text->priv = g_new0 (glMergeTextPrivate, 1);
132 gl_debug (DEBUG_MERGE, "END");
136 gl_merge_text_finalize (GObject *object)
138 glMergeText *merge_text = GL_MERGE_TEXT (object);
140 gl_debug (DEBUG_MERGE, "START");
142 g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
144 g_free (merge_text->priv);
146 G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
148 gl_debug (DEBUG_MERGE, "END");
151 /*--------------------------------------------------------------------------*/
153 /*--------------------------------------------------------------------------*/
155 gl_merge_text_set_property (GObject *object,
160 glMergeText *merge_text;
162 merge_text = GL_MERGE_TEXT (object);
167 merge_text->priv->delim = g_value_get_char (value);
168 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
169 merge_text->priv->delim);
173 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
180 /*--------------------------------------------------------------------------*/
182 /*--------------------------------------------------------------------------*/
184 gl_merge_text_get_property (GObject *object,
189 glMergeText *merge_text;
191 merge_text = GL_MERGE_TEXT (object);
196 g_value_set_char (value, merge_text->priv->delim);
200 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
207 /*--------------------------------------------------------------------------*/
209 /*--------------------------------------------------------------------------*/
211 gl_merge_text_get_key_list (glMerge *merge)
213 glMergeText *merge_text;
214 GList *record_list, *p_rec;
215 glMergeRecord *record;
217 gint i_field, n_fields, n_fields_max = 0;
220 /* Field keys are simply column numbers. */
222 gl_debug (DEBUG_MERGE, "BEGIN");
224 merge_text = GL_MERGE_TEXT (merge);
226 record_list = (GList *)gl_merge_get_record_list (merge);
228 for ( p_rec=record_list; p_rec!=NULL; p_rec=p_rec->next ) {
229 record = (glMergeRecord *)p_rec->data;
232 for ( p_field=record->field_list; p_field!=NULL; p_field=p_field->next ) {
235 if ( n_fields > n_fields_max ) n_fields_max = n_fields;
239 for (i_field=1; i_field <= n_fields_max; i_field++) {
240 key_list = g_list_append (key_list, g_strdup_printf ("%d", i_field));
244 gl_debug (DEBUG_MERGE, "END");
249 /*--------------------------------------------------------------------------*/
250 /* Get "primary" key. */
251 /*--------------------------------------------------------------------------*/
253 gl_merge_text_get_primary_key (glMerge *merge)
255 /* For now, let's always assume the first column is the primary key. */
256 return g_strdup ("1");
259 /*--------------------------------------------------------------------------*/
260 /* Open merge source. */
261 /*--------------------------------------------------------------------------*/
263 gl_merge_text_open (glMerge *merge)
265 glMergeText *merge_text;
268 merge_text = GL_MERGE_TEXT (merge);
270 src = gl_merge_get_src (merge);
273 merge_text->priv->fp = fopen (src, "r");
279 /*--------------------------------------------------------------------------*/
280 /* Close merge source. */
281 /*--------------------------------------------------------------------------*/
283 gl_merge_text_close (glMerge *merge)
285 glMergeText *merge_text;
287 merge_text = GL_MERGE_TEXT (merge);
289 if (merge_text->priv->fp != NULL) {
291 fclose (merge_text->priv->fp);
292 merge_text->priv->fp = NULL;
297 /*--------------------------------------------------------------------------*/
298 /* Get next record from merge source, NULL if no records left (i.e EOF) */
299 /*--------------------------------------------------------------------------*/
300 static glMergeRecord *
301 gl_merge_text_get_record (glMerge *merge)
303 glMergeText *merge_text;
306 glMergeRecord *record;
311 merge_text = GL_MERGE_TEXT (merge);
313 delim = merge_text->priv->delim;
314 fp = merge_text->priv->fp;
320 fields = parse_line (fp, delim);
321 if ( fields == NULL ) {
325 record = g_new0 (glMergeRecord, 1);
326 record->select_flag = TRUE;
328 for (p=fields; p != NULL; p=p->next) {
330 field = g_new0 (glMergeField, 1);
331 field->key = g_strdup_printf ("%d", i_field++);
332 #ifndef CSV_ALWAYS_UTF8
333 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
335 field->value = g_strdup (p->data);
338 record->field_list = g_list_append (record->field_list, field);
340 free_fields (&fields);
345 /*---------------------------------------------------------------------------*/
346 /* Copy merge_text specific fields. */
347 /*---------------------------------------------------------------------------*/
349 gl_merge_text_copy (glMerge *dst_merge,
352 glMergeText *dst_merge_text;
353 glMergeText *src_merge_text;
355 dst_merge_text = GL_MERGE_TEXT (dst_merge);
356 src_merge_text = GL_MERGE_TEXT (src_merge);
358 dst_merge_text->priv->delim = src_merge_text->priv->delim;
361 /*---------------------------------------------------------------------------*/
362 /* PRIVATE. Parse line. */
364 /* Attempt to be a robust parser of various CSV (and similar) formats. */
366 /* Split into fields, accounting for: */
367 /* - delimeters may be embedded in quoted text (") */
368 /* - delimeters may be "escaped" by a leading backslash (\) */
369 /* - quotes may be embedded in quoted text as two adjacent quotes ("") */
370 /* - quotes may be "escaped" either within or outside of quoted text. */
371 /* - newlines may be embedded in quoted text, allowing a field to span */
372 /* more than one line. */
374 /* This function does not do any parsing of the individual fields, other */
375 /* than to correctly interpet delimeters. Actual parsing of the individual */
376 /* fields is done in parse_field(). */
378 /* Returns a list of fields. A blank line is considered a line with one */
379 /* empty field. Returns empty (NULL) when done. */
380 /*---------------------------------------------------------------------------*/
382 parse_line (FILE *fp,
388 enum { BEGIN, NORMAL, QUOTED, QUOTED_QUOTE1,
389 NORMAL_ESCAPED, QUOTED_ESCAPED, DONE } state;
392 string = g_string_new( "" );
393 while ( state != DONE ) {
401 /* first field is empty. */
402 list = g_list_append (list, g_strdup (""));
408 string = g_string_append_c (string, c);
412 string = g_string_append_c (string, c);
413 state = NORMAL_ESCAPED;
416 /* treat as one empty field. */
417 list = g_list_append (list, g_strdup (""));
421 /* end of file, no more lines. */
425 string = g_string_append_c (string, c);
434 list = g_list_append (list, parse_field (string->str));
435 string = g_string_assign( string, "" );
441 string = g_string_append_c (string, c);
445 string = g_string_append_c (string, c);
446 state = NORMAL_ESCAPED;
450 list = g_list_append (list, parse_field (string->str));
454 string = g_string_append_c (string, c);
463 string = g_string_append_c (string, c);
464 state = QUOTED_QUOTE1;
467 string = g_string_append_c (string, c);
468 state = QUOTED_ESCAPED;
471 /* File ended mid way through quoted item */
472 list = g_list_append (list, parse_field (string->str));
476 string = g_string_append_c (string, c);
484 list = g_list_append (list, parse_field (string->str));
485 string = g_string_assign( string, "" );
491 /* insert quotes in string, stay quoted. */
492 string = g_string_append_c (string, c);
497 /* line or file ended after quoted item */
498 list = g_list_append (list, parse_field (string->str));
502 string = g_string_append_c (string, c);
511 /* File ended mid way through quoted item */
512 list = g_list_append (list, parse_field (string->str));
516 string = g_string_append_c (string, c);
525 /* File ended mid way through quoted item */
526 list = g_list_append (list, parse_field (string->str));
530 string = g_string_append_c (string, c);
537 g_assert_not_reached();
542 g_string_free( string, TRUE );
547 /*---------------------------------------------------------------------------*/
548 /* PRIVATE. Parse field. */
550 /* - Strip leading and trailing white space, unless quoted. */
551 /* - Strip CR, unless escaped. */
552 /* - Expand '\n' and '\t' into newline and tab characters. */
553 /* - Remove quotes, unless escaped (\" anywhere or "" within quotes) */
554 /*---------------------------------------------------------------------------*/
556 parse_field (gchar *raw_field)
559 gchar *pass1_field, *c, *field;
560 enum { NORMAL, NORMAL_ESCAPED, QUOTED, QUOTED_ESCAPED, QUOTED_QUOTE1} state;
564 * Pass 1: remove leading and trailing spaces.
566 pass1_field = g_strdup (raw_field);
567 g_strstrip (pass1_field);
570 * Pass 2: resolve quoting and escaping.
573 string = g_string_new( "" );
574 for ( c=pass1_field; *c != 0; c++ )
581 state = NORMAL_ESCAPED;
590 string = g_string_append_c (string, *c);
598 string = g_string_append_c (string, '\n');
602 string = g_string_append_c (string, '\t');
606 string = g_string_append_c (string, *c);
615 state = QUOTED_ESCAPED;
618 state = QUOTED_QUOTE1;
624 string = g_string_append_c (string, *c);
632 string = g_string_append_c (string, '\n');
636 string = g_string_append_c (string, '\t');
640 string = g_string_append_c (string, *c);
649 /* insert quotes in string, stay quoted. */
650 string = g_string_append_c (string, *c);
654 /* Strip CR, return to QUOTED. */
658 string = g_string_append_c (string, *c);
665 g_assert_not_reached();
671 field = g_strdup (string->str);
672 g_string_free( string, TRUE );
673 g_free (pass1_field);
678 /*---------------------------------------------------------------------------*/
679 /* Free list of fields. */
680 /*---------------------------------------------------------------------------*/
682 free_fields (GList ** list)
686 for (p = *list; p != NULL; p = p->next) {