/* strmatch.c -- ksh-like extended pattern matching for the shell and filename globbing. */ /* Copyright (C) 1991-2005 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. Bash is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. Bash is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with Bash; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */ #include #include /* for debugging */ #include "strmatch.h" #include #include "bashansi.h" #include "shmbutil.h" #include "xmalloc.h" /* First, compile `sm_loop.c' for single-byte characters. */ #define CHAR unsigned char #define U_CHAR unsigned char #define XCHAR char #define INT int #define L(CS) CS #define INVALID -1 #undef STREQ #undef STREQN #define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0) #define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0) /* We use strcoll(3) for range comparisons in bracket expressions, even though it can have unwanted side effects in locales other than POSIX or US. For instance, in the de locale, [A-Z] matches all characters. */ #if defined (HAVE_STRCOLL) /* Helper function for collating symbol equivalence. */ static int rangecmp (c1, c2) int c1, c2; { static char s1[2] = { ' ', '\0' }; static char s2[2] = { ' ', '\0' }; int ret; /* Eight bits only. Period. */ c1 &= 0xFF; c2 &= 0xFF; if (c1 == c2) return (0); s1[0] = c1; s2[0] = c2; if ((ret = strcoll (s1, s2)) != 0) return ret; return (c1 - c2); } #else /* !HAVE_STRCOLL */ # define rangecmp(c1, c2) ((int)(c1) - (int)(c2)) #endif /* !HAVE_STRCOLL */ #if defined (HAVE_STRCOLL) static int collequiv (c1, c2) int c1, c2; { return (rangecmp (c1, c2) == 0); } #else # define collequiv(c1, c2) ((c1) == (c2)) #endif #define _COLLSYM _collsym #define __COLLSYM __collsym #define POSIXCOLL posix_collsyms #include "collsyms.h" static int collsym (s, len) CHAR *s; int len; { register struct _collsym *csp; char *x; x = (char *)s; for (csp = posix_collsyms; csp->name; csp++) { if (STREQN(csp->name, x, len) && csp->name[len] == '\0') return (csp->code); } if (len == 1) return s[0]; return INVALID; } /* unibyte character classification */ #if !defined (isascii) && !defined (HAVE_ISASCII) # define isascii(c) ((unsigned int)(c) <= 0177) #endif enum char_class { CC_NO_CLASS = 0, CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH, CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT }; static char const *const cclass_name[] = { "", "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph", "lower", "print", "punct", "space", "upper", "word", "xdigit" }; #define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0])) static int is_cclass (c, name) int c; const char *name; { enum char_class char_class = CC_NO_CLASS; int i, result; for (i = 1; i < N_CHAR_CLASS; i++) { if (STREQ (name, cclass_name[i])) { char_class = (enum char_class)i; break; } } if (char_class == 0) return -1; switch (char_class) { case CC_ASCII: result = isascii (c); break; case CC_ALNUM: result = ISALNUM (c); break; case CC_ALPHA: result = ISALPHA (c); break; case CC_BLANK: result = ISBLANK (c); break; case CC_CNTRL: result = ISCNTRL (c); break; case CC_DIGIT: result = ISDIGIT (c); break; case CC_GRAPH: result = ISGRAPH (c); break; case CC_LOWER: result = ISLOWER (c); break; case CC_PRINT: result = ISPRINT (c); break; case CC_PUNCT: result = ISPUNCT (c); break; case CC_SPACE: result = ISSPACE (c); break; case CC_UPPER: result = ISUPPER (c); break; case CC_WORD: result = (ISALNUM (c) || c == '_'); break; case CC_XDIGIT: result = ISXDIGIT (c); break; default: result = -1; break; } return result; } /* Now include `sm_loop.c' for single-byte characters. */ /* The result of FOLD is an `unsigned char' */ # define FOLD(c) ((flags & FNM_CASEFOLD) \ ? TOLOWER ((unsigned char)c) \ : ((unsigned char)c)) #define FCT internal_strmatch #define GMATCH gmatch #define COLLSYM collsym #define PARSE_COLLSYM parse_collsym #define BRACKMATCH brackmatch #define PATSCAN patscan #define STRCOMPARE strcompare #define EXTMATCH extmatch #define STRCHR(S, C) strchr((S), (C)) #define STRCOLL(S1, S2) strcoll((S1), (S2)) #define STRLEN(S) strlen(S) #define STRCMP(S1, S2) strcmp((S1), (S2)) #define RANGECMP(C1, C2) rangecmp((C1), (C2)) #define COLLEQUIV(C1, C2) collequiv((C1), (C2)) #define CTYPE_T enum char_class #define IS_CCLASS(C, S) is_cclass((C), (S)) #include "sm_loop.c" #if HANDLE_MULTIBYTE # define CHAR wchar_t # define U_CHAR wint_t # define XCHAR wchar_t # define INT wint_t # define L(CS) L##CS # define INVALID WEOF # undef STREQ # undef STREQN # define STREQ(s1, s2) ((wcscmp (s1, s2) == 0)) # define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0) static int rangecmp_wc (c1, c2) wint_t c1, c2; { static wchar_t s1[2] = { L' ', L'\0' }; static wchar_t s2[2] = { L' ', L'\0' }; if (c1 == c2) return 0; s1[0] = c1; s2[0] = c2; return (wcscoll (s1, s2)); } static int collequiv_wc (c, equiv) wint_t c, equiv; { return (!(c - equiv)); } /* Helper function for collating symbol. */ # define _COLLSYM _collwcsym # define __COLLSYM __collwcsym # define POSIXCOLL posix_collwcsyms # include "collsyms.h" static wint_t collwcsym (s, len) wchar_t *s; int len; { register struct _collwcsym *csp; for (csp = posix_collwcsyms; csp->name; csp++) { if (STREQN(csp->name, s, len) && csp->name[len] == L'\0') return (csp->code); } if (len == 1) return s[0]; return INVALID; } static int is_wcclass (wc, name) wint_t wc; wchar_t *name; { char *mbs; mbstate_t state; size_t mbslength; wctype_t desc; int want_word; if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0)) { int c; if ((c = wctob (wc)) == EOF) return 0; else return (c <= 0x7F); } want_word = (wcscmp (name, L"word") == 0); if (want_word) name = L"alnum"; memset (&state, '\0', sizeof (mbstate_t)); mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1); mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state); if (mbslength == (size_t)-1 || mbslength == (size_t)-2) { free (mbs); return -1; } desc = wctype (mbs); free (mbs); if (desc == (wctype_t)0) return -1; if (want_word) return (iswctype (wc, desc) || wc == L'_'); else return (iswctype (wc, desc)); } /* Now include `sm_loop.c' for multibyte characters. */ #define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c)) #define FCT internal_wstrmatch #define GMATCH gmatch_wc #define COLLSYM collwcsym #define PARSE_COLLSYM parse_collwcsym #define BRACKMATCH brackmatch_wc #define PATSCAN patscan_wc #define STRCOMPARE wscompare #define EXTMATCH extmatch_wc #define STRCHR(S, C) wcschr((S), (C)) #define STRCOLL(S1, S2) wcscoll((S1), (S2)) #define STRLEN(S) wcslen(S) #define STRCMP(S1, S2) wcscmp((S1), (S2)) #define RANGECMP(C1, C2) rangecmp_wc((C1), (C2)) #define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2)) #define CTYPE_T enum char_class #define IS_CCLASS(C, S) is_wcclass((C), (S)) #include "sm_loop.c" #endif /* HAVE_MULTIBYTE */ int xstrmatch (pattern, string, flags) char *pattern; char *string; int flags; { #if HANDLE_MULTIBYTE int ret; size_t n; wchar_t *wpattern, *wstring; if (MB_CUR_MAX == 1) return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); n = xdupmbstowcs (&wpattern, NULL, pattern); if (n == (size_t)-1 || n == (size_t)-2) return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); n = xdupmbstowcs (&wstring, NULL, string); if (n == (size_t)-1 || n == (size_t)-2) { free (wpattern); return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); } ret = internal_wstrmatch (wpattern, wstring, flags); free (wpattern); free (wstring); return ret; #else return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags)); #endif /* !HANDLE_MULTIBYTE */ }