From 9f99c2dc866740bf252fa6f0e6e28d165ae3606f Mon Sep 17 00:00:00 2001 From: Jim Evins Date: Sun, 3 Jan 2010 22:05:08 -0500 Subject: [PATCH] Use "natural" sorting order for part numbers Use a "natural" sorting order for part numbers. I.e. numeric portions are sorted numerically rather than character by character, such that "A20" will precede "A100". Also found and fixed a couple minor inconsistencies in the template data base while debugging the new sorting code. --- libglabels/db.c | 4 +- libglabels/str.c | 138 +++++++++++++++++++++++++++- libglabels/str.h | 7 +- templates/avery-other-templates.xml | 4 +- templates/avery-us-templates.xml | 2 +- 5 files changed, 147 insertions(+), 8 deletions(-) diff --git a/libglabels/db.c b/libglabels/db.c index b05f54d3..b43ff6b3 100644 --- a/libglabels/db.c +++ b/libglabels/db.c @@ -1490,7 +1490,7 @@ lgl_db_get_template_name_list_unique (const gchar *brand, { name = g_strdup_printf ("%s %s", template->brand, template->part); names = g_list_insert_sorted (names, name, - (GCompareFunc)g_utf8_collate); + (GCompareFunc)lgl_str_part_name_cmp); } } @@ -1545,7 +1545,7 @@ lgl_db_get_template_name_list_all (const gchar *brand, { name = g_strdup_printf ("%s %s", alias->brand, alias->part); names = g_list_insert_sorted (names, name, - (GCompareFunc)g_utf8_collate); + (GCompareFunc)lgl_str_part_name_cmp); } } } diff --git a/libglabels/str.c b/libglabels/str.c index 5c21f189..627e6be4 100644 --- a/libglabels/str.c +++ b/libglabels/str.c @@ -18,10 +18,11 @@ * along with libglabels. If not, see . */ -#include +//#include #include "str.h" +#include /*===========================================*/ /* Private types */ @@ -37,6 +38,8 @@ /* Local function prototypes */ /*===========================================*/ +static gchar *span_digits (gchar **p); +static gchar *span_non_digits (gchar **p); /*===========================================*/ /* Functions. */ @@ -76,6 +79,139 @@ lgl_str_utf8_casecmp (const gchar *s1, } +/** + * lgl_str_part_name_cmp: + * @s1: string to compare with s2. + * @s2: string to compare with s1. + * + * Compare two UTF-8 strings representing part names or numbers. This function + * uses a natural sort order: + * - Ignores case. + * - Strings are divided into chunks (numeric and non-numeric) + * - Non-numeric chunks are compared character by character + * - Numerical chunks are compared numerically, so that "20" precedes "100". + * - Comparison of chunks is performed left to right until the first difference + * is encountered or all chunks evaluate as equal. + * + * This function should be used only on strings that are known to be encoded + * in UTF-8 or a compatible UTF-8 subset. + * + * Numeric chunks are converted to 64 bit unsigned integers for comparison, + * so the behaviour may be unpredictable for numeric chunks that exceed + * 18446744073709551615. + * + * Returns: 0 if the strings match, a negative value if s1 < s2, + * or a positive value if s1 > s2. + * + */ +gint +lgl_str_part_name_cmp (const gchar *s1, + const gchar *s2) +{ + gchar *folded_s1, *p1, *chunk1; + gchar *folded_s2, *p2, *chunk2; + gboolean isnum1, isnum2; + guint64 n1, n2; + gboolean done; + gint result; + + if ( s1 == s2 ) return 0; + if (s1 == NULL) return -1; + if (s2 == NULL) return 1; + + folded_s1 = g_utf8_casefold (s1, -1); + folded_s2 = g_utf8_casefold (s2, -1); + + result = 0; + done = FALSE; + p1 = folded_s1; + p2 = folded_s2; + while ( (result == 0) && !done ) + { + + if ( g_ascii_isdigit (*p1) ) + { + chunk1 = span_digits (&p1); + isnum1 = TRUE; + } + else + { + chunk1 = span_non_digits (&p1); + isnum1 = FALSE; + } + + if ( g_ascii_isdigit (*p2) ) + { + chunk2 = span_digits (&p2); + isnum2 = TRUE; + } + else + { + chunk2 = span_non_digits (&p2); + isnum2 = FALSE; + } + + if ( (strlen(chunk1) == 0) && (strlen(chunk2) == 0) ) + { + /* Case 1: Both are empty. */ + done = TRUE; + } + else if ( isnum1 && isnum2 ) + { + /* Case 2: They both contain numbers */ + n1 = g_ascii_strtoull (chunk1, NULL, 10); + n2 = g_ascii_strtoull (chunk2, NULL, 10); + + if ( n1 < n2 ) result = -1; + if ( n1 > n2 ) result = 1; + } + else + { + /* Case 3: One or both do not contain numbers */ + result = g_utf8_collate (chunk1, chunk2); + } + + g_free (chunk1); + g_free (chunk2); + } + + g_free (folded_s1); + g_free (folded_s2); + + return result; +} + + +static gchar * +span_digits (gchar **p) +{ + gchar *chunk = g_new0 (gchar, strlen (*p) + 1); + gint i; + + for ( i = 0; **p && g_ascii_isdigit (**p); i++, *p = g_utf8_next_char(*p) ) + { + chunk[i] = **p; + } + + return chunk; +} + + +static gchar * +span_non_digits (gchar **p) +{ + gchar *chunk = g_new0 (gchar, strlen (*p) + 1); + gint i; + + for ( i = 0; **p && !g_ascii_isdigit (**p); i++, *p = g_utf8_next_char(*p) ) + { + chunk[i] = **p; + } + + return chunk; +} + + /* * Local Variables: -- emacs diff --git a/libglabels/str.h b/libglabels/str.h index a3ace6f9..2675029d 100644 --- a/libglabels/str.h +++ b/libglabels/str.h @@ -25,8 +25,11 @@ G_BEGIN_DECLS -gint lgl_str_utf8_casecmp (const gchar *s1, - const gchar *s2); +gint lgl_str_utf8_casecmp (const gchar *s1, + const gchar *s2); + +gint lgl_str_part_name_cmp (const gchar *s1, + const gchar *s2); G_END_DECLS diff --git a/templates/avery-other-templates.xml b/templates/avery-other-templates.xml index 6e5a6698..a068b8c3 100644 --- a/templates/avery-other-templates.xml +++ b/templates/avery-other-templates.xml @@ -36,9 +36,9 @@ - + -