]> git.sur5r.net Git - glabels/blob - src/merge-text.c
Imported Upstream version 3.4.0
[glabels] / src / merge-text.c
1 /*
2  *  merge-text.c
3  *  Copyright (C) 2001-2009  Jim Evins <evins@snaught.com>.
4  *
5  *  This file is part of gLabels.
6  *
7  *  gLabels is free software: you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation, either version 3 of the License, or
10  *  (at your option) any later version.
11  *
12  *  gLabels is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with gLabels.  If not, see <http://www.gnu.org/licenses/>.
19  */
20
21 #include <config.h>
22
23 #include "merge-text.h"
24
25 #include <stdio.h>
26 #include <errno.h>
27 #include <string.h>
28
29 #include "debug.h"
30
31 #define LINE_BUF_LEN 1024
32
33 /*
34  * Unicode handling.
35  *  The default encoding assumption is that files are in the system encoding.
36  * However, files are checked for a Unicode BOM (Byte Order Mark), which if found
37  * alters the manner in which files are handled.
38  */
39 enum UnicodeEncoding {
40         SYSTEM_ENCODING,
41         UTF8,
42         UTF16_LE,
43         UTF16_BE,
44         UTF32_LE,
45         UTF32_BE
46 };
47         
48 /*===========================================*/
49 /* Private types                             */
50 /*===========================================*/
51
52 struct _glMergeTextPrivate {
53
54         gchar             delim;
55         gboolean          line1_has_keys;
56
57         enum UnicodeEncoding   encoding;
58         GIConv             g_iconverter;
59         gchar              char_buf[MB_LEN_MAX];
60         gsize              buf_pos;
61         gsize              buf_len;
62
63         FILE             *fp;
64
65         GPtrArray        *keys;
66         gint              n_fields_max;
67 };
68
69 enum {
70         LAST_SIGNAL
71 };
72
73 enum {
74         ARG_0,
75         ARG_DELIM,
76         ARG_LINE1_HAS_KEYS
77 };
78
79
80 /*===========================================*/
81 /* Private globals                           */
82 /*===========================================*/
83
84
85 /*===========================================*/
86 /* Local function prototypes                 */
87 /*===========================================*/
88
89 static void           gl_merge_text_finalize        (GObject          *object);
90
91 static void           gl_merge_text_set_property    (GObject          *object,
92                                                      guint             param_id,
93                                                      const GValue     *value,
94                                                      GParamSpec       *pspec);
95
96 static void           gl_merge_text_get_property    (GObject          *object,
97                                                      guint             param_id,
98                                                      GValue           *value,
99                                                      GParamSpec       *pspec);
100
101 static gchar         *key_from_index                (glMergeText      *merge_text,
102                                                      gint              i_field);
103 static void           clear_keys                    (glMergeText      *merge_text);
104
105 static GList         *gl_merge_text_get_key_list    (const glMerge    *merge);
106 static gchar         *gl_merge_text_get_primary_key (const glMerge    *merge);
107 static void           gl_merge_text_open            (glMerge          *merge);
108 static void           gl_merge_text_close           (glMerge          *merge);
109 static glMergeRecord *gl_merge_text_get_record      (glMerge          *merge);
110 static void           gl_merge_text_copy            (glMerge          *dst_merge,
111                                                      const glMerge    *src_merge);
112
113 static GList         *parse_line                    (glMergeText       *merge_text,
114                                                      gchar             delim);
115 static void           free_fields                   (GList           **fields);
116
117
118
119 /*****************************************************************************/
120 /* Boilerplate object stuff.                                                 */
121 /*****************************************************************************/
122 G_DEFINE_TYPE (glMergeText, gl_merge_text, GL_TYPE_MERGE)
123
124
125 static void
126 gl_merge_text_class_init (glMergeTextClass *class)
127 {
128         GObjectClass *object_class = G_OBJECT_CLASS (class);
129         glMergeClass *merge_class  = GL_MERGE_CLASS (class);
130
131         gl_debug (DEBUG_MERGE, "START");
132
133         gl_merge_text_parent_class = g_type_class_peek_parent (class);
134
135         object_class->set_property = gl_merge_text_set_property;
136         object_class->get_property = gl_merge_text_get_property;
137
138         g_object_class_install_property
139                 (object_class,
140                  ARG_DELIM,
141                  g_param_spec_char ("delim", NULL, NULL,
142                                     0, 0x7F, ',',
143                                     (G_PARAM_READABLE | G_PARAM_WRITABLE)));
144
145         g_object_class_install_property
146                 (object_class,
147                  ARG_LINE1_HAS_KEYS,
148                  g_param_spec_boolean ("line1_has_keys", NULL, NULL,
149                                        FALSE,
150                                        (G_PARAM_READABLE | G_PARAM_WRITABLE)));
151
152         object_class->finalize = gl_merge_text_finalize;
153
154         merge_class->get_key_list    = gl_merge_text_get_key_list;
155         merge_class->get_primary_key = gl_merge_text_get_primary_key;
156         merge_class->open            = gl_merge_text_open;
157         merge_class->close           = gl_merge_text_close;
158         merge_class->get_record      = gl_merge_text_get_record;
159         merge_class->copy            = gl_merge_text_copy;
160
161         gl_debug (DEBUG_MERGE, "END");
162 }
163
164
165 static void
166 gl_merge_text_init (glMergeText *merge_text)
167 {
168         gl_debug (DEBUG_MERGE, "START");
169
170         merge_text->priv = g_new0 (glMergeTextPrivate, 1);
171
172         merge_text->priv->keys = g_ptr_array_new ();
173
174         gl_debug (DEBUG_MERGE, "END");
175 }
176
177
178 static void
179 gl_merge_text_finalize (GObject *object)
180 {
181         glMergeText *merge_text = GL_MERGE_TEXT (object);
182
183         gl_debug (DEBUG_MERGE, "START");
184
185         g_return_if_fail (object && GL_IS_MERGE_TEXT (object));
186
187         clear_keys (merge_text);
188         g_ptr_array_free (merge_text->priv->keys, TRUE);
189         g_free (merge_text->priv);
190
191         G_OBJECT_CLASS (gl_merge_text_parent_class)->finalize (object);
192
193         gl_debug (DEBUG_MERGE, "END");
194 }
195
196
197 /*--------------------------------------------------------------------------*/
198 /* Set argument.                                                            */
199 /*--------------------------------------------------------------------------*/
200 static void
201 gl_merge_text_set_property (GObject      *object,
202                             guint         param_id,
203                             const GValue *value,
204                             GParamSpec   *pspec)
205 {
206         glMergeText *merge_text;
207
208         merge_text = GL_MERGE_TEXT (object);
209
210         switch (param_id) {
211
212         case ARG_DELIM:
213                 merge_text->priv->delim = g_value_get_schar (value);
214                 gl_debug (DEBUG_MERGE, "ARG \"delim\" = \"%c\"",
215                           merge_text->priv->delim);
216                 break;
217
218         case ARG_LINE1_HAS_KEYS:
219                 merge_text->priv->line1_has_keys = g_value_get_boolean (value);
220                 gl_debug (DEBUG_MERGE, "ARG \"line1_has_keys\" = \"%d\"",
221                           merge_text->priv->line1_has_keys);
222                 break;
223
224         default:
225                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
226                 break;
227
228         }
229
230 }
231
232
233 /*--------------------------------------------------------------------------*/
234 /* Get argument.                                                            */
235 /*--------------------------------------------------------------------------*/
236 static void
237 gl_merge_text_get_property (GObject     *object,
238                             guint        param_id,
239                             GValue      *value,
240                             GParamSpec  *pspec)
241 {
242         glMergeText *merge_text;
243
244         merge_text = GL_MERGE_TEXT (object);
245
246         switch (param_id) {
247
248         case ARG_DELIM:
249                 g_value_set_schar (value, merge_text->priv->delim);
250                 break;
251
252         case ARG_LINE1_HAS_KEYS:
253                 g_value_set_boolean (value, merge_text->priv->line1_has_keys);
254                 break;
255
256         default:
257                 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, param_id, pspec);
258                 break;
259
260         }
261
262 }
263
264
265 /*---------------------------------------------------------------------------*/
266 /* Lookup key name from zero based index.                                    */
267 /*---------------------------------------------------------------------------*/
268 static gchar *
269 key_from_index (glMergeText  *merge_text,
270                 gint          i_field)
271 {
272         if ( merge_text->priv->line1_has_keys &&
273              (i_field < merge_text->priv->keys->len) )
274         {
275                 return g_strdup (g_ptr_array_index (merge_text->priv->keys, i_field));
276         }
277         else
278         {
279                 return g_strdup_printf ("%d", i_field+1);
280         }
281 }
282
283
284 /*---------------------------------------------------------------------------*/
285 /* Clear stored keys.                                                        */
286 /*---------------------------------------------------------------------------*/
287 static void
288 clear_keys (glMergeText      *merge_text)
289 {
290         gint i;
291
292         for ( i = 0; i < merge_text->priv->keys->len; i++ )
293         {
294                 g_free (g_ptr_array_index (merge_text->priv->keys, i));
295         }
296         merge_text->priv->keys->len = 0;
297 }
298
299
300 /*--------------------------------------------------------------------------*/
301 /* Get key list.                                                            */
302 /*--------------------------------------------------------------------------*/
303 static GList *
304 gl_merge_text_get_key_list (const glMerge *merge)
305 {
306         glMergeText   *merge_text;
307         gint           i_field, n_fields;
308         GList         *key_list;
309         
310         gl_debug (DEBUG_MERGE, "BEGIN");
311
312         merge_text = GL_MERGE_TEXT (merge);
313
314         if ( merge_text->priv->line1_has_keys )
315         {
316                 n_fields = merge_text->priv->keys->len;
317         }
318         else
319         {
320                 n_fields = merge_text->priv->n_fields_max;
321         }
322
323         key_list = NULL;
324         for ( i_field=0; i_field < n_fields; i_field++ )
325         {
326                 key_list = g_list_append (key_list, key_from_index(merge_text, i_field));
327         }
328
329         gl_debug (DEBUG_MERGE, "END");
330
331         return key_list;
332 }
333
334
335 /*--------------------------------------------------------------------------*/
336 /* Get "primary" key.                                                       */
337 /*--------------------------------------------------------------------------*/
338 static gchar *
339 gl_merge_text_get_primary_key (const glMerge *merge)
340 {
341         /* For now, let's always assume the first column is the primary key. */
342         return key_from_index (GL_MERGE_TEXT (merge), 0);
343 }
344
345 /*--------------------------------------------------------------------------*/
346 /* Read the byte order marks to determine unicode encoding, if any.         */
347 /* See https://en.wikipedia.org/wiki/Byte_order_mark                        */
348 /*--------------------------------------------------------------------------*/
349 static enum UnicodeEncoding
350 gl_read_encoding(FILE* fp) {
351         enum UnicodeEncoding encoding;
352         gchar ch, ch2, ch3, ch4;
353         ch = getc(fp);
354
355         if (ch == '\xff') {
356                 ch2 = getc(fp);
357
358                 if (ch2 == '\xfe') {
359                         ch3 = getc(fp);
360                         ch4 = getc(fp);
361                         if (ch3 == '\0' && ch4 == '\0') {
362                                 encoding = UTF32_LE;
363                         } else {
364                                 ungetc(ch4, fp);
365                                 ungetc(ch3, fp);
366                                 encoding = UTF16_LE;
367                         }
368                 } else {
369                         ungetc(ch2, fp);
370                         ungetc(ch, fp);
371                         encoding = SYSTEM_ENCODING;
372                 }
373         } else if (ch == '\xfe') {
374                 ch2 = getc(fp);
375                 if (ch2 == '\xff') {
376                         encoding = UTF16_BE;
377                 } else {
378                         ungetc(ch2, fp);
379                         ungetc(ch, fp);
380                         encoding = SYSTEM_ENCODING;
381                 }
382         } else if (ch == '\0') {
383                 ch2 = getc(fp);
384                 ch3 = getc(fp);
385                 ch4 = getc(fp);
386                 if (ch2 == '\0' && ch3 == '\xfe' && ch4 == '\xff') {
387                         encoding = UTF32_BE;
388                 } else {
389                         ungetc(ch4, fp);
390                         ungetc(ch3, fp);
391                         ungetc(ch2, fp);
392                         encoding = SYSTEM_ENCODING;
393                 }
394         } else if (ch == '\xef') {
395                 ch2 = getc(fp);
396                 if (ch2 == '\xbb') {
397                         ch3 = getc(fp);
398                         if (ch3 == '\xbf') {
399                                 encoding = UTF8;
400                         } else {
401                                 ungetc(ch3, fp);
402                                 ungetc(ch2, fp);
403                                 ungetc(ch, fp);
404                                 encoding = SYSTEM_ENCODING;
405                         }
406                 } else {
407                         ungetc(ch2, fp);
408                         ungetc(ch, fp);
409                         encoding = SYSTEM_ENCODING;
410                 }
411         } else {
412                 ungetc(ch, fp);
413                 encoding = SYSTEM_ENCODING;
414         }
415         return encoding;
416 }
417
418 /*
419  * gLabels get-character routine for possibly Unicode text files.
420  * If the source has a byte order mark (BOM) indicating a Unicode file, 
421  * g_iconv is used to convert input characters to GDK-standard UTF8 format.
422  */
423
424 static gchar
425 gl_getc(glMergeText *merge_text) {
426         if (merge_text->priv->buf_pos < merge_text->priv->buf_len) {
427                 return merge_text->priv->char_buf[merge_text->priv->buf_pos++];
428         } else if (merge_text->priv->encoding == SYSTEM_ENCODING || 
429                    merge_text->priv->encoding == UTF8) {
430                 return getc(merge_text->priv->fp);
431         } else {
432                 /*
433                  * a UTF-16 stream might include surrogates, which encode
434                  * characters in successive 16-bit units. If we read a
435                  * leading surrogate, read in the trailing one as well for 
436                  * processing.
437                  */
438                 gchar wcbuf[4];
439                 size_t result;
440                 gchar* outbufp;
441                 int hob_offset;
442                 int unit_len;
443                 switch (merge_text->priv->encoding) {
444                 case UTF16_BE:
445                         hob_offset = 0;
446                         unit_len = 2;
447                         break;
448                 case UTF16_LE:
449                         hob_offset = 1;
450                         unit_len = 2;
451                         break;
452                 case UTF32_BE:
453                 case UTF32_LE:
454                         hob_offset = -1;
455                         unit_len = 4;
456                         break;
457                 }
458                 gsize nBytes = fread(wcbuf, 1, unit_len, merge_text->priv->fp);
459                 if (nBytes == 0)
460                         return EOF;
461                 if (hob_offset >= 0 && (wcbuf[hob_offset] & 0xfd) == 0xd8) {
462                         nBytes += fread(wcbuf+unit_len, 1, unit_len, merge_text->priv->fp);
463                 }
464                 gchar* wcbufp = wcbuf;
465                 outbufp = merge_text->priv->char_buf;
466                 gsize buflen = sizeof(merge_text->priv->char_buf);
467                 result = g_iconv(merge_text->priv->g_iconverter, &wcbufp, &nBytes,
468                                  &outbufp, &buflen);
469                 if (result == EOF) {
470                         g_warning("g_iconv: %s", strerror(errno));
471                 }
472                 merge_text->priv->buf_len = outbufp - merge_text->priv->char_buf;
473                 merge_text->priv->buf_pos = 0;
474                 return merge_text->priv->char_buf[merge_text->priv->buf_pos++];
475         }
476 }
477
478
479 /*--------------------------------------------------------------------------*/
480 /* Open merge source.                                                       */
481 /*--------------------------------------------------------------------------*/
482 static void
483 gl_merge_text_open (glMerge *merge)
484 {
485         glMergeText *merge_text;
486         gchar       *src;
487
488         GList       *line1_fields;
489         GList       *p;
490
491         merge_text = GL_MERGE_TEXT (merge);
492
493         src = gl_merge_get_src (merge);
494
495         if (src != NULL)
496         {
497                 if (g_utf8_strlen(src, -1) == 1 && src[0] == '-') {
498                         merge_text->priv->fp = stdin;
499                         merge_text->priv->encoding = SYSTEM_ENCODING;
500                 } else {
501                         if ((merge_text->priv->fp = fopen (src, "r")) != NULL) {
502                                 merge_text->priv->encoding = gl_read_encoding(merge_text->priv->fp);
503                         } else {
504                                 g_warning("gl_merge_text_open: %s (%s)",
505                                         strerror(errno), src);
506                         }
507                 }
508                 g_free (src);
509
510                 gchar* in_codeset = NULL;
511                 switch (merge_text->priv->encoding) {
512                 case UTF8:
513                 case SYSTEM_ENCODING:
514                         break;
515                 case UTF16_BE:
516                         in_codeset = "UTF-16BE";
517                         break;
518                 case UTF16_LE:
519                         in_codeset = "UTF-16LE";
520                         break;
521                 case UTF32_BE:
522                         in_codeset = "UTF-32BE";
523                         break;
524                 case UTF32_LE:
525                         in_codeset = "UTF-32LE";
526                         break;
527                 }
528                 if (in_codeset != NULL) {
529                         merge_text->priv->g_iconverter = g_iconv_open("UTF8", in_codeset);
530                         /* Since we define both codesets, we should always be able to open the converter */
531                         g_assert(merge_text->priv->g_iconverter != (GIConv)-1);
532                 }
533                 clear_keys (merge_text);
534                 merge_text->priv->n_fields_max = 0;
535
536                 if ( merge_text->priv->line1_has_keys )
537                 {
538                         /*
539                          * Extract keys from first line and discard line
540                          */
541
542                         line1_fields = parse_line (merge_text, merge_text->priv->delim);
543                         for ( p = line1_fields; p != NULL; p = p->next )
544                         {
545                                 g_ptr_array_add (merge_text->priv->keys, g_strdup (p->data));
546                         }
547                         free_fields (&line1_fields);
548                 }
549
550         }
551
552
553 }
554
555
556 /*--------------------------------------------------------------------------*/
557 /* Close merge source.                                                      */
558 /*--------------------------------------------------------------------------*/
559 static void
560 gl_merge_text_close (glMerge *merge)
561 {
562         glMergeText *merge_text;
563
564         merge_text = GL_MERGE_TEXT (merge);
565
566         if (merge_text->priv->fp != NULL) {
567
568                 fclose (merge_text->priv->fp);
569                 merge_text->priv->fp = NULL;
570
571         }
572         if (merge_text->priv->g_iconverter != 0) {
573                 g_iconv_close(merge_text->priv->g_iconverter);
574                 merge_text->priv->g_iconverter = 0;
575         }
576 }
577
578
579 /*--------------------------------------------------------------------------*/
580 /* Get next record from merge source, NULL if no records left (i.e EOF)     */
581 /*--------------------------------------------------------------------------*/
582 static glMergeRecord *
583 gl_merge_text_get_record (glMerge *merge)
584 {
585         glMergeText   *merge_text;
586         gchar          delim;
587         glMergeRecord *record;
588         GList         *fields, *p;
589         gint           i_field;
590         glMergeField  *field;
591
592         merge_text = GL_MERGE_TEXT (merge);
593
594         delim = merge_text->priv->delim;
595
596         fields = parse_line (merge_text, delim);
597         if ( fields == NULL ) {
598                 return NULL;
599         }
600
601         record = g_new0 (glMergeRecord, 1);
602         record->select_flag = TRUE;
603         for (p=fields, i_field=0; p != NULL; p=p->next, i_field++) {
604
605                 field = g_new0 (glMergeField, 1);
606                 field->key = key_from_index (merge_text, i_field);
607 #ifndef CSV_ALWAYS_UTF8
608                 if (merge_text->priv->encoding == SYSTEM_ENCODING) {
609                         field->value = g_locale_to_utf8 (p->data, -1, NULL, NULL, NULL);
610                 } else {
611                         field->value = g_strdup (p->data);
612                 }
613 #else
614                 field->value = g_strdup (p->data);
615 #endif
616
617                 record->field_list = g_list_append (record->field_list, field);
618         }
619         free_fields (&fields);
620
621         if ( i_field > merge_text->priv->n_fields_max )
622         {
623                 merge_text->priv->n_fields_max = i_field;
624         }
625
626         return record;
627 }
628
629
630 /*---------------------------------------------------------------------------*/
631 /* Copy merge_text specific fields.                                          */
632 /*---------------------------------------------------------------------------*/
633 static void
634 gl_merge_text_copy (glMerge       *dst_merge,
635                     const glMerge *src_merge)
636 {
637         glMergeText *dst_merge_text;
638         glMergeText *src_merge_text;
639         gint         i;
640
641         dst_merge_text = GL_MERGE_TEXT (dst_merge);
642         src_merge_text = GL_MERGE_TEXT (src_merge);
643
644         dst_merge_text->priv->delim          = src_merge_text->priv->delim;
645         dst_merge_text->priv->line1_has_keys = src_merge_text->priv->line1_has_keys;
646
647         for ( i=0; i < src_merge_text->priv->keys->len; i++ )
648         {
649                 g_ptr_array_add (dst_merge_text->priv->keys,
650                                  g_strdup ((gchar *)g_ptr_array_index (src_merge_text->priv->keys, i)));
651         }
652
653         dst_merge_text->priv->n_fields_max   = src_merge_text->priv->n_fields_max;
654 }
655
656
657 /*---------------------------------------------------------------------------*/
658 /* PRIVATE.  Parse line.                                                     */
659 /*                                                                           */
660 /* Attempt to be a robust parser of various CSV (and similar) formats.       */
661 /*                                                                           */
662 /* Based on CSV format described in RFC 4180 section 2.                      */
663 /*                                                                           */
664 /* Additions to RFC 4180 rules:                                              */
665 /*   - delimeters and other special characters may be "escaped" by a leading */
666 /*     backslash (\)                                                         */
667 /*   - C escape sequences for newline (\n) and tab (\t) are also translated. */
668 /*   - if quoted text is not followed by a delimeter, any additional text is */
669 /*     concatenated with quoted portion.                                     */
670 /*                                                                           */
671 /* Returns a list of fields.  A blank line is considered a line with one     */
672 /* empty field.  Returns empty (NULL) when done.                             */
673 /*---------------------------------------------------------------------------*/
674 static GList *
675 parse_line (glMergeText* merge_text,
676             gchar  delim )
677 {
678         GList   *list;
679         GString *field;
680         gint     c;
681         enum { DELIM,
682                QUOTED, QUOTED_QUOTE1, QUOTED_ESCAPED,
683                SIMPLE, SIMPLE_ESCAPED,
684                DONE } state;
685
686         if (merge_text->priv->fp == NULL) {
687                 return NULL;
688         }
689                
690         state = DELIM;
691         list  = NULL;
692         field = g_string_new( "" );
693         while ( state != DONE ) {
694                 c=gl_getc (merge_text);
695
696                 switch (state) {
697
698                 case DELIM:
699                         switch (c) {
700                         case '\n':
701                                 /* last field is empty. */
702                                 list = g_list_append (list, g_strdup (""));
703                                 state = DONE;
704                                 break;
705                         case '\r':
706                                 /* ignore */
707                                 state = DELIM;
708                                 break;
709                         case EOF:
710                                 /* end of file, no more lines. */
711                                 state = DONE;
712                                 break;
713                         case '"':
714                                 /* start a quoted field. */
715                                 state = QUOTED;
716                                 break;
717                         case '\\':
718                                 /* simple field, but 1st character is an escape. */
719                                 state = SIMPLE_ESCAPED;
720                                 break;
721                         default:
722                                 if ( c == delim )
723                                 {
724                                         /* field is empty. */
725                                         list = g_list_append (list, g_strdup (""));
726                                         state = DELIM;
727                                 }
728                                 else
729                                 {
730                                         /* begining of a simple field. */
731                                         field = g_string_append_c (field, c);
732                                         state = SIMPLE;
733                                 }
734                                 break;
735                         }
736                         break;
737
738                 case QUOTED:
739                         switch (c) {
740                         case EOF:
741                                 /* File ended mid way through quoted item, truncate field. */
742                                 list = g_list_append (list, g_strdup (field->str));
743                                 state = DONE;
744                                 break;
745                         case '"':
746                                 /* Possible end of field, but could be 1st of a pair. */
747                                 state = QUOTED_QUOTE1;
748                                 break;
749                         case '\\':
750                                 /* Escape next character, or special escape, e.g. \n. */
751                                 state = QUOTED_ESCAPED;
752                                 break;
753                         default:
754                                 /* Use character literally. */
755                                 field = g_string_append_c (field, c);
756                                 break;
757                         }
758                         break;
759
760                 case QUOTED_QUOTE1:
761                         switch (c) {
762                         case '\n':
763                         case EOF:
764                                 /* line or file ended after quoted item */
765                                 list = g_list_append (list, g_strdup (field->str));
766                                 state = DONE;
767                                 break;
768                         case '"':
769                                 /* second quote, insert and stay quoted. */
770                                 field = g_string_append_c (field, c);
771                                 state = QUOTED;
772                                 break;
773                         case '\r':
774                                 /* ignore and go to fallback */
775                                 state = SIMPLE;
776                                 break;
777                         default:
778                                 if ( c == delim )
779                                 {
780                                         /* end of field. */
781                                         list = g_list_append (list, g_strdup (field->str));
782                                         field = g_string_assign( field, "" );
783                                         state = DELIM;
784                                 }
785                                 else
786                                 {
787                                         /* fallback if not a delim or another quote. */
788                                         field = g_string_append_c (field, c);
789                                         state = SIMPLE;
790                                 }
791                                 break;
792                         }
793                         break;
794
795                 case QUOTED_ESCAPED:
796                         switch (c) {
797                         case EOF:
798                                 /* File ended mid way through quoted item */
799                                 list = g_list_append (list, g_strdup (field->str));
800                                 state = DONE;
801                                 break;
802                         case 'n':
803                                 /* Decode "\n" as newline. */
804                                 field = g_string_append_c (field, '\n');
805                                 state = QUOTED;
806                                 break;
807                         case 't':
808                                 /* Decode "\t" as tab. */
809                                 field = g_string_append_c (field, '\t');
810                                 state = QUOTED;
811                                 break;
812                         default:
813                                 /* Use character literally. */
814                                 field = g_string_append_c (field, c);
815                                 state = QUOTED;
816                                 break;
817                         }
818                         break;
819
820                 case SIMPLE:
821                         switch (c) {
822                         case '\n':
823                         case EOF:
824                                 /* line or file ended */
825                                 list = g_list_append (list, g_strdup (field->str));
826                                 state = DONE;
827                                 break;
828                         case '\r':
829                                 /* ignore */
830                                 state = SIMPLE;
831                                 break;
832                         case '\\':
833                                 /* Escape next character, or special escape, e.g. \n. */
834                                 state = SIMPLE_ESCAPED;
835                                 break;
836                         default:
837                                 if ( c == delim )
838                                 {
839                                         /* end of field. */
840                                         list = g_list_append (list, g_strdup (field->str));
841                                         field = g_string_assign( field, "" );
842                                         state = DELIM;
843                                 }
844                                 else
845                                 {
846                                         /* Use character literally. */
847                                         field = g_string_append_c (field, c);
848                                         state = SIMPLE;
849                                 }
850                                 break;
851                         }
852                         break;
853
854                 case SIMPLE_ESCAPED:
855                         switch (c) {
856                         case EOF:
857                                 /* File ended mid way through quoted item */
858                                 list = g_list_append (list, g_strdup (field->str));
859                                 state = DONE;
860                                 break;
861                         case 'n':
862                                 /* Decode "\n" as newline. */
863                                 field = g_string_append_c (field, '\n');
864                                 state = SIMPLE;
865                                 break;
866                         case 't':
867                                 /* Decode "\t" as tab. */
868                                 field = g_string_append_c (field, '\t');
869                                 state = SIMPLE;
870                                 break;
871                         default:
872                                 /* Use character literally. */
873                                 field = g_string_append_c (field, c);
874                                 state = SIMPLE;
875                                 break;
876                         }
877                         break;
878
879                 default:
880                         g_assert_not_reached();
881                         break;
882                 }
883
884         }
885         g_string_free( field, TRUE );
886
887         return list;
888 }
889
890
891 /*---------------------------------------------------------------------------*/
892 /* Free list of fields.                                                      */
893 /*---------------------------------------------------------------------------*/
894 void
895 free_fields (GList ** list)
896 {
897         GList *p;
898
899         for ( p = *list; p != NULL; p = p->next )
900         {
901                 g_free (p->data);
902                 p->data = NULL;
903         }
904
905         g_list_free (*list);
906         *list = NULL;
907 }
908
909
910
911 /*
912  * Local Variables:       -- emacs
913  * mode: C                -- emacs
914  * c-basic-offset: 8      -- emacs
915  * tab-width: 8           -- emacs
916  * indent-tabs-mode: nil  -- emacs
917  * End:                   -- emacs
918  */