1 /* Copyright (C) 1991,1992,1993,1996,1997,1998,1999,2000,2001,2003,2004,2005,
2 2007 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
24 int no_leading_period;
28 /* Match STRING against the filename pattern PATTERN, returning zero if
29 it matches, nonzero if not. */
30 static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
31 const CHAR *string_end, int no_leading_period, int flags);
32 static const CHAR *END (const CHAR *patternp);
38 const CHAR *string_end,
39 int no_leading_period,
43 register const CHAR *p = pattern, *n = string;
46 # if WIDE_CHAR_VERSION
47 const char *collseq = (const char *)
48 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
50 const UCHAR *collseq = (const UCHAR *)
51 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
55 while ((c = *p++) != L('\0'))
57 int new_no_leading_period = 0;
63 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
67 res = EXT (c, p, n, string_end, no_leading_period,
75 else if (*n == L('/') && (flags & FNM_FILE_NAME))
77 else if (*n == L('.') && no_leading_period)
82 if (!(flags & FNM_NOESCAPE))
86 /* Trailing \ loses. */
90 if (n == string_end || FOLD ((UCHAR) *n) != c)
95 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
99 res = EXT (c, p, n, string_end, no_leading_period,
104 else if (ends != NULL)
106 ends->pattern = p - 1;
108 ends->no_leading_period = no_leading_period;
112 if (n != string_end && *n == L('.') && no_leading_period)
115 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
117 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
119 const CHAR *endp = END (p);
122 /* This is a pattern. Skip over it. */
130 /* A ? needs to match one character. */
132 /* There isn't another character; no match. */
134 else if (*n == L('/')
135 && __builtin_expect (flags & FNM_FILE_NAME, 0))
136 /* A slash does not match a wildcard under
140 /* One character of the string is consumed in matching
141 this ? wildcard, so *??? won't match if there are
142 less than three characters. */
148 /* The wildcard(s) is/are the last element of the pattern.
149 If the name is a file name and contains another slash
150 this means it cannot match, unless the FNM_LEADING_DIR
153 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
155 if (flags & FNM_FILE_NAME)
157 if (flags & FNM_LEADING_DIR)
161 if (MEMCHR (n, L('/'), string_end - n) == NULL)
174 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
180 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
181 && (c == L('@') || c == L('+') || c == L('!'))
184 int flags2 = ((flags & FNM_FILE_NAME)
185 ? flags : (flags & ~FNM_PERIOD));
187 for (--p; n < endp; ++n, no_leading_period = 0)
188 if (FCT (p, n, string_end, no_leading_period, flags2,
192 else if (c == L('/') && (flags & FNM_FILE_NAME))
194 while (n < string_end && *n != L('/'))
196 if (n < string_end && *n == L('/')
197 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
203 int flags2 = ((flags & FNM_FILE_NAME)
204 ? flags : (flags & ~FNM_PERIOD));
206 if (c == L('\\') && !(flags & FNM_NOESCAPE))
209 for (--p; n < endp; ++n, no_leading_period = 0)
210 if (FOLD ((UCHAR) *n) == c
211 && (FCT (p, n, string_end, no_leading_period, flags2,
215 if (end.pattern == NULL)
219 if (end.pattern != NULL)
223 no_leading_period = end.no_leading_period;
229 /* If we come here no match is possible with the wildcard. */
234 /* Nonzero if the sense of the character class is inverted. */
239 if (posixly_correct == 0)
240 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
245 if (*n == L('.') && no_leading_period)
248 if (*n == L('/') && (flags & FNM_FILE_NAME))
249 /* `/' cannot be matched. */
252 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
256 fn = FOLD ((UCHAR) *n);
261 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
265 c = FOLD ((UCHAR) *p);
270 else if (c == L('[') && *p == L(':'))
272 /* Leave room for the null. */
273 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
275 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
278 const CHAR *startp = p;
282 if (c1 == CHAR_CLASS_MAX_LENGTH)
283 /* The name is too long and therefore the pattern
288 if (c == L(':') && p[1] == L(']'))
293 if (c < L('a') || c >= L('z'))
295 /* This cannot possibly be a character class name.
296 Match it as a normal range. */
305 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
306 wt = IS_CHAR_CLASS (str);
308 /* Invalid character class name. */
311 # if defined _LIBC && ! WIDE_CHAR_VERSION
312 /* The following code is glibc specific but does
313 there a good job in speeding up the code since
314 we can avoid the btowc() call. */
315 if (_ISCTYPE ((UCHAR) *n, wt))
318 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
322 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
323 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
324 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
325 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
326 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
327 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
328 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
329 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
330 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
331 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
332 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
333 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
339 else if (c == L('[') && *p == L('='))
343 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
344 const CHAR *startp = p;
356 if (c != L('=') || p[1] != L(']'))
366 if ((UCHAR) *n == str[0])
371 const int32_t *table;
372 # if WIDE_CHAR_VERSION
373 const int32_t *weights;
374 const int32_t *extra;
376 const unsigned char *weights;
377 const unsigned char *extra;
379 const int32_t *indirect;
381 const UCHAR *cp = (const UCHAR *) str;
383 /* This #include defines a local function! */
384 # if WIDE_CHAR_VERSION
385 # include <locale/weightwc.h>
387 # include <locale/weight.h>
390 # if WIDE_CHAR_VERSION
391 table = (const int32_t *)
392 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
393 weights = (const int32_t *)
394 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
395 extra = (const int32_t *)
396 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
397 indirect = (const int32_t *)
398 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
400 table = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
402 weights = (const unsigned char *)
403 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
404 extra = (const unsigned char *)
405 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
406 indirect = (const int32_t *)
407 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
413 /* We found a table entry. Now see whether the
414 character we are currently at has the same
415 equivalance class value. */
416 int len = weights[idx];
418 const UCHAR *np = (const UCHAR *) n;
420 idx2 = findidx (&np);
421 if (idx2 != 0 && len == weights[idx2])
426 && (weights[idx + 1 + cnt]
427 == weights[idx2 + 1 + cnt]))
439 else if (c == L('\0'))
440 /* [ (unterminated) loses. */
449 if (c == L('[') && *p == L('.'))
452 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
453 const CHAR *startp = p;
459 if (c == L('.') && p[1] == L(']'))
469 /* We have to handling the symbols differently in
470 ranges since then the collation sequence is
472 is_range = *p == L('-') && p[1] != L('\0');
476 /* There are no names defined in the collation
477 data. Therefore we only accept the trivial
478 names consisting of the character itself. */
482 if (!is_range && *n == startp[1])
491 const int32_t *symb_table;
492 # ifdef WIDE_CHAR_VERSION
496 # define str (startp + 1)
498 const unsigned char *extra;
504 # ifdef WIDE_CHAR_VERSION
505 /* We have to convert the name to a single-byte
506 string. This is possible since the names
507 consist of ASCII characters and the internal
508 representation is UCS4. */
509 for (strcnt = 0; strcnt < c1; ++strcnt)
510 str[strcnt] = startp[1 + strcnt];
514 _NL_CURRENT_WORD (LC_COLLATE,
515 _NL_COLLATE_SYMB_HASH_SIZEMB);
516 symb_table = (const int32_t *)
517 _NL_CURRENT (LC_COLLATE,
518 _NL_COLLATE_SYMB_TABLEMB);
519 extra = (const unsigned char *)
520 _NL_CURRENT (LC_COLLATE,
521 _NL_COLLATE_SYMB_EXTRAMB);
523 /* Locate the character in the hashing table. */
524 hash = elem_hash (str, c1);
527 elem = hash % table_size;
528 if (symb_table[2 * elem] != 0)
530 second = hash % (table_size - 2) + 1;
534 /* First compare the hashing value. */
535 if (symb_table[2 * elem] == hash
537 == extra[symb_table[2 * elem + 1]])
539 &extra[symb_table[2 * elem
543 /* Yep, this is the entry. */
544 idx = symb_table[2 * elem + 1];
545 idx += 1 + extra[idx];
552 while (symb_table[2 * elem] != 0);
555 if (symb_table[2 * elem] != 0)
557 /* Compare the byte sequence but only if
558 this is not part of a range. */
559 # ifdef WIDE_CHAR_VERSION
562 idx += 1 + extra[idx];
563 /* Adjust for the alignment. */
564 idx = (idx + 3) & ~3;
566 wextra = (int32_t *) &extra[idx + 4];
571 # ifdef WIDE_CHAR_VERSION
573 (int32_t) c1 < wextra[idx];
575 if (n[c1] != wextra[1 + c1])
578 if ((int32_t) c1 == wextra[idx])
581 for (c1 = 0; c1 < extra[idx]; ++c1)
582 if (n[c1] != extra[1 + c1])
585 if (c1 == extra[idx])
590 /* Get the collation sequence value. */
592 # ifdef WIDE_CHAR_VERSION
593 cold = wextra[1 + wextra[idx]];
595 /* Adjust for the alignment. */
596 idx += 1 + extra[idx];
597 idx = (idx + 3) & ~4;
598 cold = *((int32_t *) &extra[idx]);
605 /* No valid character. Match it as a
607 if (!is_range && *n == str[0])
624 /* We have to handling the symbols differently in
625 ranges since then the collation sequence is
627 is_range = (*p == L('-') && p[1] != L('\0')
630 if (!is_range && c == fn)
633 /* This is needed if we goto normal_bracket; from
634 outside of is_seqval's scope. */
640 if (c == L('-') && *p != L(']'))
643 /* We have to find the collation sequence
644 value for C. Collation sequence is nothing
645 we can regularly access. The sequence
646 value is defined by the order in which the
647 definitions of the collation values for the
648 various characters appear in the source
649 file. A strange concept, nowhere
655 # ifdef WIDE_CHAR_VERSION
656 /* Search in the `names' array for the characters. */
657 fcollseq = __collseq_table_lookup (collseq, fn);
658 if (fcollseq == ~((uint32_t) 0))
659 /* XXX We don't know anything about the character
660 we are supposed to match. This means we are
662 goto range_not_matched;
667 lcollseq = __collseq_table_lookup (collseq, cold);
669 fcollseq = collseq[fn];
670 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
674 if (cend == L('[') && *p == L('.'))
677 _NL_CURRENT_WORD (LC_COLLATE,
679 const CHAR *startp = p;
685 if (c == L('.') && p[1] == L(']'))
697 /* There are no names defined in the
698 collation data. Therefore we only
699 accept the trivial names consisting
700 of the character itself. */
709 const int32_t *symb_table;
710 # ifdef WIDE_CHAR_VERSION
714 # define str (startp + 1)
716 const unsigned char *extra;
722 # ifdef WIDE_CHAR_VERSION
723 /* We have to convert the name to a single-byte
724 string. This is possible since the names
725 consist of ASCII characters and the internal
726 representation is UCS4. */
727 for (strcnt = 0; strcnt < c1; ++strcnt)
728 str[strcnt] = startp[1 + strcnt];
732 _NL_CURRENT_WORD (LC_COLLATE,
733 _NL_COLLATE_SYMB_HASH_SIZEMB);
734 symb_table = (const int32_t *)
735 _NL_CURRENT (LC_COLLATE,
736 _NL_COLLATE_SYMB_TABLEMB);
737 extra = (const unsigned char *)
738 _NL_CURRENT (LC_COLLATE,
739 _NL_COLLATE_SYMB_EXTRAMB);
741 /* Locate the character in the hashing
743 hash = elem_hash (str, c1);
746 elem = hash % table_size;
747 if (symb_table[2 * elem] != 0)
749 second = hash % (table_size - 2) + 1;
753 /* First compare the hashing value. */
754 if (symb_table[2 * elem] == hash
756 == extra[symb_table[2 * elem + 1]])
758 &extra[symb_table[2 * elem + 1]
761 /* Yep, this is the entry. */
762 idx = symb_table[2 * elem + 1];
763 idx += 1 + extra[idx];
770 while (symb_table[2 * elem] != 0);
773 if (symb_table[2 * elem] != 0)
775 /* Compare the byte sequence but only if
776 this is not part of a range. */
777 # ifdef WIDE_CHAR_VERSION
780 idx += 1 + extra[idx];
781 /* Adjust for the alignment. */
782 idx = (idx + 3) & ~4;
784 wextra = (int32_t *) &extra[idx + 4];
786 /* Get the collation sequence value. */
788 # ifdef WIDE_CHAR_VERSION
789 cend = wextra[1 + wextra[idx]];
791 /* Adjust for the alignment. */
792 idx += 1 + extra[idx];
793 idx = (idx + 3) & ~4;
794 cend = *((int32_t *) &extra[idx]);
797 else if (symb_table[2 * elem] != 0 && c1 == 1)
809 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
816 /* XXX It is not entirely clear to me how to handle
817 characters which are not mentioned in the
818 collation specification. */
820 # ifdef WIDE_CHAR_VERSION
821 lcollseq == 0xffffffff ||
823 lcollseq <= fcollseq)
825 /* We have to look at the upper bound. */
832 # ifdef WIDE_CHAR_VERSION
834 __collseq_table_lookup (collseq, cend);
835 if (hcollseq == ~((uint32_t) 0))
837 /* Hum, no information about the upper
838 bound. The matching succeeds if the
839 lower bound is matched exactly. */
840 if (lcollseq != fcollseq)
841 goto range_not_matched;
846 hcollseq = collseq[cend];
850 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
853 # ifdef WIDE_CHAR_VERSION
857 /* We use a boring value comparison of the character
858 values. This is better than comparing using
859 `strcoll' since the latter would have surprising
860 and sometimes fatal consequences. */
863 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
869 if (cold <= fn && fn <= cend)
886 /* Skip the rest of the [...] that already matched. */
893 /* [... (unterminated) loses. */
896 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
900 /* XXX 1003.2d11 is unclear if this is right. */
903 else if (c == L('[') && *p == L(':'))
906 const CHAR *startp = p;
911 if (++c1 == CHAR_CLASS_MAX_LENGTH)
914 if (*p == L(':') && p[1] == L(']'))
917 if (c < L('a') || c >= L('z'))
926 else if (c == L('[') && *p == L('='))
932 if (c != L('=') || p[1] != L(']'))
937 else if (c == L('[') && *p == L('.'))
946 if (*p == L('.') && p[1] == L(']'))
962 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
966 res = EXT (c, p, n, string_end, no_leading_period, flags);
973 if (NO_LEADING_PERIOD (flags))
975 if (n == string_end || c != (UCHAR) *n)
978 new_no_leading_period = 1;
984 if (n == string_end || c != FOLD ((UCHAR) *n))
988 no_leading_period = new_no_leading_period;
995 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
996 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
1005 END (const CHAR *pattern)
1007 const CHAR *p = pattern;
1010 if (*++p == L('\0'))
1011 /* This is an invalid pattern. */
1013 else if (*p == L('['))
1015 /* Handle brackets special. */
1016 if (posixly_correct == 0)
1017 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1019 /* Skip the not sign. We have to recognize it because of a possibly
1021 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1023 /* A leading ']' is recognized as such. */
1026 /* Skip over all characters of the list. */
1027 while (*p != L(']'))
1028 if (*p++ == L('\0'))
1029 /* This is no valid pattern. */
1032 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1033 || *p == L('!')) && p[1] == L('('))
1035 else if (*p == L(')'))
1044 EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1045 int no_leading_period, int flags)
1051 struct patternlist *next;
1054 struct patternlist **lastp = &list;
1055 size_t pattern_len = STRLEN (pattern);
1059 /* Parse the pattern. Store the individual parts in the list. */
1061 for (startp = p = pattern + 1; level >= 0; ++p)
1063 /* This is an invalid pattern. */
1065 else if (*p == L('['))
1067 /* Handle brackets special. */
1068 if (posixly_correct == 0)
1069 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1071 /* Skip the not sign. We have to recognize it because of a possibly
1073 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1075 /* A leading ']' is recognized as such. */
1078 /* Skip over all characters of the list. */
1079 while (*p != L(']'))
1080 if (*p++ == L('\0'))
1081 /* This is no valid pattern. */
1084 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1085 || *p == L('!')) && p[1] == L('('))
1086 /* Remember the nesting level. */
1088 else if (*p == L(')'))
1092 /* This means we found the end of the pattern. */
1093 #define NEW_PATTERN \
1094 struct patternlist *newp; \
1096 if (opt == L('?') || opt == L('@')) \
1097 newp = alloca (sizeof (struct patternlist) \
1098 + (pattern_len * sizeof (CHAR))); \
1100 newp = alloca (sizeof (struct patternlist) \
1101 + ((p - startp + 1) * sizeof (CHAR))); \
1102 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1103 newp->next = NULL; \
1109 else if (*p == L('|'))
1117 assert (list != NULL);
1118 assert (p[-1] == L(')'));
1124 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1131 for (rs = string; rs <= string_end; ++rs)
1132 /* First match the prefix with the current pattern with the
1134 if (FCT (list->str, string, rs, no_leading_period,
1135 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1137 /* This was successful. Now match the rest with the rest
1139 && (FCT (p, rs, string_end,
1142 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1143 flags & FNM_FILE_NAME
1144 ? flags : flags & ~FNM_PERIOD, NULL) == 0
1145 /* This didn't work. Try the whole pattern. */
1147 && FCT (pattern - 1, rs, string_end,
1150 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1152 flags & FNM_FILE_NAME
1153 ? flags : flags & ~FNM_PERIOD, NULL) == 0)))
1154 /* It worked. Signal success. */
1157 while ((list = list->next) != NULL);
1159 /* None of the patterns lead to a match. */
1163 if (FCT (p, string, string_end, no_leading_period, flags, NULL) == 0)
1169 /* I cannot believe it but `strcat' is actually acceptable
1170 here. Match the entire string with the prefix from the
1171 pattern list and the rest of the pattern following the
1173 if (FCT (STRCAT (list->str, p), string, string_end,
1175 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1177 /* It worked. Signal success. */
1179 while ((list = list->next) != NULL);
1181 /* None of the patterns lead to a match. */
1185 for (rs = string; rs <= string_end; ++rs)
1187 struct patternlist *runp;
1189 for (runp = list; runp != NULL; runp = runp->next)
1190 if (FCT (runp->str, string, rs, no_leading_period,
1191 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1195 /* If none of the patterns matched see whether the rest does. */
1197 && (FCT (p, rs, string_end,
1200 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1201 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1203 /* This is successful. */
1207 /* None of the patterns together with the rest of the pattern
1212 assert (! "Invalid extended matching operator");