git.sur5r.net Git - glabels/blob - libglabels/lgl-str.c

   1 /*
   2  *  lgl-str.c
   3  *  Copyright (C) 2007-2010  Jim Evins <evins@snaught.com>.
   4  *
   5  *  This file is part of libglabels.
   6  *
   7  *  libglabels is free software: you can redistribute it and/or modify
   8  *  it under the terms of the GNU Lesser General Public License as published by
   9  *  the Free Software Foundation, either version 3 of the License, or
  10  *  (at your option) any later version.
  11  *
  12  *  libglabels is distributed in the hope that it will be useful,
  13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  *  GNU Lesser General Public License for more details.
  16  *
  17  *  You should have received a copy of the GNU Lesser General Public License
  18  *  along with libglabels.  If not, see <http://www.gnu.org/licenses/>.
  19  */
  20
  21 #include <config.h>
  22
  23 #include "lgl-str.h"
  24
  25 #include <string.h>
  26 #include <math.h>
  27
  28 #define FRAC_EPSILON 0.00005
  29
  30
  31 /*===========================================*/
  32 /* Private types                             */
  33 /*===========================================*/
  34
  35
  36 /*===========================================*/
  37 /* Private globals                           */
  38 /*===========================================*/
  39
  40
  41 /*===========================================*/
  42 /* Local function prototypes                 */
  43 /*===========================================*/
  44
  45 static gchar *span_digits (gchar **p);
  46 static gchar *span_non_digits (gchar **p);
  47
  48 /*===========================================*/
  49 /* Functions.                                */
  50 /*===========================================*/
  51
  52 /**
  53  * lgl_str_utf8_casecmp:
  54  * @s1: string to compare with s2.
  55  * @s2: string to compare with s1.
  56  *
  57  * Compare two UTF-8 strings, ignoring the case of characters.
  58  *
  59  * This function should be used only on strings that are known to be encoded
  60  * in UTF-8 or a compatible UTF-8 subset.
  61  *
  62  * Returns: 0 if the strings match, a negative value if s1 < s2,
  63  *          or a positive value if s1 > s2.
  64  *
  65  */
  66 gint
  67 lgl_str_utf8_casecmp (const gchar *s1,
  68                       const gchar *s2)
  69 {
  70         gchar *folded_s1;
  71         gchar *folded_s2;
  72         gint   result;
  73
  74         folded_s1 = g_utf8_casefold (s1, -1);
  75         folded_s2 = g_utf8_casefold (s2, -1);
  76
  77         result = g_utf8_collate (folded_s1, folded_s2);
  78
  79         g_free (folded_s1);
  80         g_free (folded_s2);
  81
  82         return result;
  83 }
  84
  85
  86 /**
  87  * lgl_str_part_name_cmp:
  88  * @s1: string to compare with s2.
  89  * @s2: string to compare with s1.
  90  *
  91  * Compare two UTF-8 strings representing part names or numbers.  This function
  92  * uses a natural sort order:
  93  *
  94  *  - Ignores case.
  95  *
  96  *  - Strings are divided into chunks (numeric and non-numeric)
  97  *
  98  *  - Non-numeric chunks are compared character by character
  99  *
 100  *  - Numerical chunks are compared numerically, so that "20" precedes "100".
 101  *
 102  *  - Comparison of chunks is performed left to right until the first difference
 103  *    is encountered or all chunks evaluate as equal.
 104  *
 105  * This function should be used only on strings that are known to be encoded
 106  * in UTF-8 or a compatible UTF-8 subset.
 107  *
 108  * Numeric chunks are converted to 64 bit unsigned integers for comparison,
 109  * so the behaviour may be unpredictable for numeric chunks that exceed
 110  * 18446744073709551615.
 111  *
 112  * Returns: 0 if the strings match, a negative value if s1 < s2,
 113  *          or a positive value if s1 > s2.
 114  *
 115  */
 116 gint
 117 lgl_str_part_name_cmp (const gchar *s1,
 118                        const gchar *s2)
 119 {
 120         gchar *folded_s1, *p1, *chunk1;
 121         gchar *folded_s2, *p2, *chunk2;
 122         gboolean isnum1, isnum2;
 123         guint64 n1, n2;
 124         gboolean done;
 125         gint   result;
 126
 127         if ( s1 == s2 ) return 0;
 128         if (s1 == NULL) return -1;
 129         if (s2 == NULL) return 1;
 130
 131         folded_s1 = g_utf8_casefold (s1, -1);
 132         folded_s2 = g_utf8_casefold (s2, -1);
 133
 134         result = 0;
 135         done = FALSE;
 136         p1 = folded_s1;
 137         p2 = folded_s2;
 138         while ( (result == 0) && !done )
 139         {
 140
 141                 if ( g_ascii_isdigit (*p1) )
 142                 {
 143                         chunk1 = span_digits (&p1);
 144                         isnum1 = TRUE;
 145                 }
 146                 else
 147                 {
 148                         chunk1 = span_non_digits (&p1);
 149                         isnum1 = FALSE;
 150                 }
 151
 152                 if ( g_ascii_isdigit (*p2) )
 153                 {
 154                         chunk2 = span_digits (&p2);
 155                         isnum2 = TRUE;
 156                 }
 157                 else
 158                 {
 159                         chunk2 = span_non_digits (&p2);
 160                         isnum2 = FALSE;
 161                 }
 162
 163                 if ( (*chunk1 == '\0') && (*chunk2 == '\0') )
 164                 {
 165                         /* Case 1: Both are empty. */
 166                         done = TRUE;
 167                 }
 168                 else if ( isnum1 && isnum2 )
 169                 {
 170                         /* Case 2: They both contain numbers */
 171                         n1 = g_ascii_strtoull (chunk1, NULL, 10);
 172                         n2 = g_ascii_strtoull (chunk2, NULL, 10);
 173
 174                         if ( n1 < n2 ) result = -1;
 175                         if ( n1 > n2 ) result =  1;
 176                 }
 177                 else
 178                 {
 179                         /* Case 3: One or both do not contain numbers */
 180                         result = g_utf8_collate (chunk1, chunk2);
 181                 }
 182
 183                 g_free (chunk1);
 184                 g_free (chunk2);
 185         }
 186
 187         g_free (folded_s1);
 188         g_free (folded_s2);
 189
 190         return result;
 191 }
 192
 193
 194 static gchar *
 195 span_digits (gchar **p)
 196 {
 197         gchar *chunk = g_new0 (gchar, strlen (*p) + 1);
 198         gint i;
 199
 200         for ( i = 0; **p && g_ascii_isdigit (**p); i++, *p = g_utf8_next_char(*p) )
 201         {
 202                 chunk[i] = **p;
 203         }
 204
 205         return chunk;
 206 }
 207
 208
 209 static gchar *
 210 span_non_digits (gchar **p)
 211 {
 212         gchar *chunk = g_new0 (gchar, strlen (*p) + 1);
 213         gint i;
 214
 215         for ( i = 0; **p && !g_ascii_isdigit (**p); i++, *p = g_utf8_next_char(*p) )
 216         {
 217                 chunk[i] = **p;
 218         }
 219
 220         return chunk;
 221 }
 222
 223
 224 /**
 225  * lgl_str_format_fraction:
 226  * @x: Floating point number to convert to fractional notation
 227  *
 228  * Create fractional representation of number, if possible.  Uses UTF-8 superscripts and
 229  * subscripts for numerator and denominator values respecively.
 230  *
 231  * Returns: UTF-8 string containing fractional representation of x.
 232  */
 233 gchar *
 234 lgl_str_format_fraction (gdouble x)
 235 {
 236         static gdouble denom[]        = {  1.,  2., 3.,  4.,  8.,  16.,  32.,  0. };
 237         static gchar  *denom_string[] = { "1", "₂", "₃", "₄", "₈", "₁₆", "₃₂", NULL };
 238         static gchar  *num_string[]   = {  "⁰",  "¹",  "²",  "³",  "⁴",  "⁵",  "⁶",  "⁷",  "⁸",  "⁹",
 239                                           "¹⁰", "¹¹", "¹²", "¹³", "¹⁴", "¹⁵", "¹⁶", "¹⁷", "¹⁸", "¹⁹",
 240                                           "²⁰", "²¹", "²²", "²³", "²⁴", "²⁵", "²⁶", "²⁷", "²⁸", "²⁹",
 241                                           "³⁰", "³¹" };
 242         gint i;
 243         gdouble product, remainder;
 244         gint n, d;
 245
 246         for ( i=0; denom[i] != 0.0; i++ )
 247         {
 248                 product = x * denom[i];
 249                 remainder = fabs(product - ((gint)(product+0.5)));
 250                 if ( remainder < FRAC_EPSILON ) break;
 251         }
 252
 253         if ( denom[i] == 0.0 )
 254         {
 255                 /* None of our denominators work. */
 256                 return g_strdup_printf ("%.5g", x);
 257         }
 258         if ( denom[i] == 1.0 )
 259         {
 260                 /* Simple integer. */
 261                 return g_strdup_printf ("%.0f", x);
 262         }
 263         n = (gint)( x * denom[i] + 0.5 );
 264         d = (gint)denom[i];
 265         if ( n > d )
 266         {
 267                 return g_strdup_printf ("%d%s/%s", (n/d), num_string[n%d], denom_string[i]);
 268         }
 269         else
 270         {
 271                 return g_strdup_printf ("%s/%s", num_string[n%d], denom_string[i]);
 272         }
 273 }
 274
 275
 276
 277 /*
 278  * Local Variables:       -- emacs
 279  * mode: C                -- emacs
 280  * c-basic-offset: 8      -- emacs
 281  * tab-width: 8           -- emacs
 282  * indent-tabs-mode: nil  -- emacs
 283  * End:                   -- emacs
 284  */