3 * Copyright (C) 2001-2009 Jim Evins <evins@snaught.com>.
5 * This file is part of gLabels.
7 * gLabels is free software: you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation, either version 3 of the License, or
10 * (at your option) any later version.
12 * gLabels is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License
18 * along with gLabels. If not, see <http://www.gnu.org/licenses/>.
23 #include "merge-text.h"
29 #define LINE_BUF_LEN 1024
32 /*===========================================*/
34 /*===========================================*/
36 struct _glMergeTextPrivate {
39 gboolean line1_has_keys;
58 /*===========================================*/
60 /*===========================================*/
63 /*===========================================*/
64 /* Local function prototypes */
65 /*===========================================*/
67 static void gl_merge_text_finalize (GObject *object);
69 static void gl_merge_text_set_property (GObject *object,
74 static void gl_merge_text_get_property (GObject *object,
79 static gchar *key_from_index (glMergeText *merge_text,
81 static void clear_keys (glMergeText *merge_text);
83 static GList *gl_merge_text_get_key_list (const glMerge *merge);
84 static gchar *gl_merge_text_get_primary_key (const glMerge *merge);
85 static void gl_merge_text_open (glMerge *merge);
86 static void gl_merge_text_close (glMerge *merge);
87 static glMergeRecord *gl_merge_text_get_record (glMerge *merge);
88 static void gl_merge_text_copy (glMerge *dst_merge,
89 const glMerge *src_merge);
91 static GList *parse_line (FILE *fp,
93 static void free_fields (GList **fields);
97 /*****************************************************************************/
98 /* Boilerplate object stuff. */
99 /*****************************************************************************/
100 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE)
104 gl_merge_text_class_init (glMergeTextClass *class)
106 GObjectClass *object_class = G_OBJECT_CLASS (class);
107 glMergeClass *merge_class = GL_MERGE_CLASS (class);
109 gl_debug (DEBUG_MERGE, "START");
111 gl_merge_text_parent_class = g_type_class_peek_parent (class);
113 object_class->set_property = gl_merge_text_set_property;
114 object_class->get_property = gl_merge_text_get_property;
116 g_object_class_install_property
119 g_param_spec_char ("delim", NULL, NULL,
121 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
123 g_object_class_install_property
126 g_param_spec_boolean ("line1_has_keys", NULL, NULL,
128 (G_PARAM_READABLE | G_PARAM_WRITABLE)));
130 object_class->finalize = gl_merge_text_finalize;
132 merge_class->get_key_list = gl_merge_text_get_key_list;
133 merge_class->get_primary_key = gl_merge_text_get_primary_key;
134 merge_class->open = gl_merge_text_open;
135 merge_class->close = gl_merge_text_close;
136 merge_class->get_record = gl_merge_text_get_record;
137 merge_class->copy = gl_merge_text_copy;
139 gl_debug (DEBUG_MERGE, "END");
144 gl_merge_text_init (glMergeText *merge_text)
146 gl_debug (DEBUG_MERGE, "START");
148 merge_text->priv = g_new0 (glMergeTextPrivate, 1);
150 merge_text->priv->keys = g_ptr_array_new ();
152 gl_debug (DEBUG_MERGE, "END");
157 gl_merge_text_finalize (GObject *object)
159 glMergeText *merge_text = GL_MERGE_TEXT (object);
161 gl_debug (DEBUG_MERGE, "START");
163 g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
165 clear_keys (merge_text);
166 g_ptr_array_free (merge_text->priv->keys, TRUE);
167 g_free (merge_text->priv);
169 G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
171 gl_debug (DEBUG_MERGE, "END");
175 /*--------------------------------------------------------------------------*/
177 /*--------------------------------------------------------------------------*/
179 gl_merge_text_set_property (GObject *object,
184 glMergeText *merge_text;
186 merge_text = GL_MERGE_TEXT (object);
191 merge_text->priv->delim = g_value_get_schar (value);
192 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
193 merge_text->priv->delim);
196 case ARG_LINE1_HAS_KEYS:
197 merge_text->priv->line1_has_keys = g_value_get_boolean (value);
198 gl_debug (DEBUG_MERGE, "ARG \"line1_has_keys\" = \"%d\"",
199 merge_text->priv->line1_has_keys);
203 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
211 /*--------------------------------------------------------------------------*/
213 /*--------------------------------------------------------------------------*/
215 gl_merge_text_get_property (GObject *object,
220 glMergeText *merge_text;
222 merge_text = GL_MERGE_TEXT (object);
227 g_value_set_schar (value, merge_text->priv->delim);
230 case ARG_LINE1_HAS_KEYS:
231 g_value_set_boolean (value, merge_text->priv->line1_has_keys);
235 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
243 /*---------------------------------------------------------------------------*/
244 /* Lookup key name from zero based index. */
245 /*---------------------------------------------------------------------------*/
247 key_from_index (glMergeText *merge_text,
250 if ( merge_text->priv->line1_has_keys &&
251 (i_field < merge_text->priv->keys->len) )
253 return g_strdup (g_ptr_array_index (merge_text->priv->keys, i_field));
257 return g_strdup_printf ("%d", i_field+1);
262 /*---------------------------------------------------------------------------*/
263 /* Clear stored keys. */
264 /*---------------------------------------------------------------------------*/
266 clear_keys (glMergeText *merge_text)
270 for ( i = 0; i < merge_text->priv->keys->len; i++ )
272 g_free (g_ptr_array_index (merge_text->priv->keys, i));
274 merge_text->priv->keys->len = 0;
278 /*--------------------------------------------------------------------------*/
280 /*--------------------------------------------------------------------------*/
282 gl_merge_text_get_key_list (const glMerge *merge)
284 glMergeText *merge_text;
285 gint i_field, n_fields;
288 gl_debug (DEBUG_MERGE, "BEGIN");
290 merge_text = GL_MERGE_TEXT (merge);
292 if ( merge_text->priv->line1_has_keys )
294 n_fields = merge_text->priv->keys->len;
298 n_fields = merge_text->priv->n_fields_max;
302 for ( i_field=0; i_field < n_fields; i_field++ )
304 key_list = g_list_append (key_list, key_from_index(merge_text, i_field));
307 gl_debug (DEBUG_MERGE, "END");
313 /*--------------------------------------------------------------------------*/
314 /* Get "primary" key. */
315 /*--------------------------------------------------------------------------*/
317 gl_merge_text_get_primary_key (const glMerge *merge)
319 /* For now, let's always assume the first column is the primary key. */
320 return key_from_index (GL_MERGE_TEXT (merge), 0);
324 /*--------------------------------------------------------------------------*/
325 /* Open merge source. */
326 /*--------------------------------------------------------------------------*/
328 gl_merge_text_open (glMerge *merge)
330 glMergeText *merge_text;
336 merge_text = GL_MERGE_TEXT (merge);
338 src = gl_merge_get_src (merge);
342 if (g_utf8_strlen(src, -1) == 1 && src[0] == '-')
343 merge_text->priv->fp = stdin;
345 merge_text->priv->fp = fopen (src, "r");
349 clear_keys (merge_text);
350 merge_text->priv->n_fields_max = 0;
352 if ( merge_text->priv->line1_has_keys )
355 * Extract keys from first line and discard line
358 line1_fields = parse_line (merge_text->priv->fp, merge_text->priv->delim);
359 for ( p = line1_fields; p != NULL; p = p->next )
361 g_ptr_array_add (merge_text->priv->keys, g_strdup (p->data));
363 free_fields (&line1_fields);
372 /*--------------------------------------------------------------------------*/
373 /* Close merge source. */
374 /*--------------------------------------------------------------------------*/
376 gl_merge_text_close (glMerge *merge)
378 glMergeText *merge_text;
380 merge_text = GL_MERGE_TEXT (merge);
382 if (merge_text->priv->fp != NULL) {
384 fclose (merge_text->priv->fp);
385 merge_text->priv->fp = NULL;
391 /*--------------------------------------------------------------------------*/
392 /* Get next record from merge source, NULL if no records left (i.e EOF) */
393 /*--------------------------------------------------------------------------*/
394 static glMergeRecord *
395 gl_merge_text_get_record (glMerge *merge)
397 glMergeText *merge_text;
400 glMergeRecord *record;
405 merge_text = GL_MERGE_TEXT (merge);
407 delim = merge_text->priv->delim;
408 fp = merge_text->priv->fp;
410 fields = parse_line (fp, delim);
411 if ( fields == NULL ) {
415 record = g_new0 (glMergeRecord, 1);
416 record->select_flag = TRUE;
417 for (p=fields, i_field=0; p != NULL; p=p->next, i_field++) {
419 field = g_new0 (glMergeField, 1);
420 field->key = key_from_index (merge_text, i_field);
421 #ifndef CSV_ALWAYS_UTF8
422 field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
424 field->value = g_strdup (p->data);
427 record->field_list = g_list_append (record->field_list, field);
429 free_fields (&fields);
431 if ( i_field > merge_text->priv->n_fields_max )
433 merge_text->priv->n_fields_max = i_field;
440 /*---------------------------------------------------------------------------*/
441 /* Copy merge_text specific fields. */
442 /*---------------------------------------------------------------------------*/
444 gl_merge_text_copy (glMerge *dst_merge,
445 const glMerge *src_merge)
447 glMergeText *dst_merge_text;
448 glMergeText *src_merge_text;
451 dst_merge_text = GL_MERGE_TEXT (dst_merge);
452 src_merge_text = GL_MERGE_TEXT (src_merge);
454 dst_merge_text->priv->delim = src_merge_text->priv->delim;
455 dst_merge_text->priv->line1_has_keys = src_merge_text->priv->line1_has_keys;
457 for ( i=0; i < src_merge_text->priv->keys->len; i++ )
459 g_ptr_array_add (dst_merge_text->priv->keys,
460 g_strdup ((gchar *)g_ptr_array_index (src_merge_text->priv->keys, i)));
463 dst_merge_text->priv->n_fields_max = src_merge_text->priv->n_fields_max;
467 /*---------------------------------------------------------------------------*/
468 /* PRIVATE. Parse line. */
470 /* Attempt to be a robust parser of various CSV (and similar) formats. */
472 /* Based on CSV format described in RFC 4180 section 2. */
474 /* Additions to RFC 4180 rules: */
475 /* - delimeters and other special characters may be "escaped" by a leading */
477 /* - C escape sequences for newline (\n) and tab (\t) are also translated. */
478 /* - if quoted text is not followed by a delimeter, any additional text is */
479 /* concatenated with quoted portion. */
481 /* Returns a list of fields. A blank line is considered a line with one */
482 /* empty field. Returns empty (NULL) when done. */
483 /*---------------------------------------------------------------------------*/
485 parse_line (FILE *fp,
492 QUOTED, QUOTED_QUOTE1, QUOTED_ESCAPED,
493 SIMPLE, SIMPLE_ESCAPED,
502 field = g_string_new( "" );
503 while ( state != DONE ) {
511 /* last field is empty. */
512 list = g_list_append (list, g_strdup (""));
520 /* end of file, no more lines. */
524 /* start a quoted field. */
528 /* simple field, but 1st character is an escape. */
529 state = SIMPLE_ESCAPED;
534 /* field is empty. */
535 list = g_list_append (list, g_strdup (""));
540 /* begining of a simple field. */
541 field = g_string_append_c (field, c);
551 /* File ended mid way through quoted item, truncate field. */
552 list = g_list_append (list, g_strdup (field->str));
556 /* Possible end of field, but could be 1st of a pair. */
557 state = QUOTED_QUOTE1;
560 /* Escape next character, or special escape, e.g. \n. */
561 state = QUOTED_ESCAPED;
564 /* Use character literally. */
565 field = g_string_append_c (field, c);
574 /* line or file ended after quoted item */
575 list = g_list_append (list, g_strdup (field->str));
579 /* second quote, insert and stay quoted. */
580 field = g_string_append_c (field, c);
584 /* ignore and go to fallback */
591 list = g_list_append (list, g_strdup (field->str));
592 field = g_string_assign( field, "" );
597 /* fallback if not a delim or another quote. */
598 field = g_string_append_c (field, c);
608 /* File ended mid way through quoted item */
609 list = g_list_append (list, g_strdup (field->str));
613 /* Decode "\n" as newline. */
614 field = g_string_append_c (field, '\n');
618 /* Decode "\t" as tab. */
619 field = g_string_append_c (field, '\t');
623 /* Use character literally. */
624 field = g_string_append_c (field, c);
634 /* line or file ended */
635 list = g_list_append (list, g_strdup (field->str));
643 /* Escape next character, or special escape, e.g. \n. */
644 state = SIMPLE_ESCAPED;
650 list = g_list_append (list, g_strdup (field->str));
651 field = g_string_assign( field, "" );
656 /* Use character literally. */
657 field = g_string_append_c (field, c);
667 /* File ended mid way through quoted item */
668 list = g_list_append (list, g_strdup (field->str));
672 /* Decode "\n" as newline. */
673 field = g_string_append_c (field, '\n');
677 /* Decode "\t" as tab. */
678 field = g_string_append_c (field, '\t');
682 /* Use character literally. */
683 field = g_string_append_c (field, c);
690 g_assert_not_reached();
695 g_string_free( field, TRUE );
701 /*---------------------------------------------------------------------------*/
702 /* Free list of fields. */
703 /*---------------------------------------------------------------------------*/
705 free_fields (GList ** list)
709 for (p = *list; p != NULL; p = p->next) {
721 * Local Variables: -- emacs
723 * c-basic-offset: 8 -- emacs
724 * tab-width: 8 -- emacs
725 * indent-tabs-mode: nil -- emacs