summaryrefslogtreecommitdiff
path: root/lib/glob/smatch.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/glob/smatch.c')
-rw-r--r--lib/glob/smatch.c393
1 files changed, 393 insertions, 0 deletions
diff --git a/lib/glob/smatch.c b/lib/glob/smatch.c
new file mode 100644
index 0000000..12fde3d
--- /dev/null
+++ b/lib/glob/smatch.c
@@ -0,0 +1,393 @@
+/* strmatch.c -- ksh-like extended pattern matching for the shell and filename
+ globbing. */
+
+/* Copyright (C) 1991-2005 Free Software Foundation, Inc.
+
+ This file is part of GNU Bash, the Bourne Again SHell.
+
+ Bash is free software; you can redistribute it and/or modify it under
+ the terms of the GNU General Public License as published by the Free
+ Software Foundation; either version 2, or (at your option) any later
+ version.
+
+ Bash is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ for more details.
+
+ You should have received a copy of the GNU General Public License along
+ with Bash; see the file COPYING. If not, write to the Free Software
+ Foundation, 59 Temple Place, Suite 330, Boston, MA 02111 USA. */
+
+#include <config.h>
+
+#include <stdio.h> /* for debugging */
+
+#include "strmatch.h"
+#include <chartypes.h>
+
+#include "bashansi.h"
+#include "shmbutil.h"
+#include "xmalloc.h"
+
+/* First, compile `sm_loop.c' for single-byte characters. */
+#define CHAR unsigned char
+#define U_CHAR unsigned char
+#define XCHAR char
+#define INT int
+#define L(CS) CS
+#define INVALID -1
+
+#undef STREQ
+#undef STREQN
+#define STREQ(a, b) ((a)[0] == (b)[0] && strcmp(a, b) == 0)
+#define STREQN(a, b, n) ((a)[0] == (b)[0] && strncmp(a, b, n) == 0)
+
+/* We use strcoll(3) for range comparisons in bracket expressions,
+ even though it can have unwanted side effects in locales
+ other than POSIX or US. For instance, in the de locale, [A-Z] matches
+ all characters. */
+
+#if defined (HAVE_STRCOLL)
+/* Helper function for collating symbol equivalence. */
+static int rangecmp (c1, c2)
+ int c1, c2;
+{
+ static char s1[2] = { ' ', '\0' };
+ static char s2[2] = { ' ', '\0' };
+ int ret;
+
+ /* Eight bits only. Period. */
+ c1 &= 0xFF;
+ c2 &= 0xFF;
+
+ if (c1 == c2)
+ return (0);
+
+ s1[0] = c1;
+ s2[0] = c2;
+
+ if ((ret = strcoll (s1, s2)) != 0)
+ return ret;
+ return (c1 - c2);
+}
+#else /* !HAVE_STRCOLL */
+# define rangecmp(c1, c2) ((int)(c1) - (int)(c2))
+#endif /* !HAVE_STRCOLL */
+
+#if defined (HAVE_STRCOLL)
+static int
+collequiv (c1, c2)
+ int c1, c2;
+{
+ return (rangecmp (c1, c2) == 0);
+}
+#else
+# define collequiv(c1, c2) ((c1) == (c2))
+#endif
+
+#define _COLLSYM _collsym
+#define __COLLSYM __collsym
+#define POSIXCOLL posix_collsyms
+#include "collsyms.h"
+
+static int
+collsym (s, len)
+ CHAR *s;
+ int len;
+{
+ register struct _collsym *csp;
+ char *x;
+
+ x = (char *)s;
+ for (csp = posix_collsyms; csp->name; csp++)
+ {
+ if (STREQN(csp->name, x, len) && csp->name[len] == '\0')
+ return (csp->code);
+ }
+ if (len == 1)
+ return s[0];
+ return INVALID;
+}
+
+/* unibyte character classification */
+#if !defined (isascii) && !defined (HAVE_ISASCII)
+# define isascii(c) ((unsigned int)(c) <= 0177)
+#endif
+
+enum char_class
+ {
+ CC_NO_CLASS = 0,
+ CC_ASCII, CC_ALNUM, CC_ALPHA, CC_BLANK, CC_CNTRL, CC_DIGIT, CC_GRAPH,
+ CC_LOWER, CC_PRINT, CC_PUNCT, CC_SPACE, CC_UPPER, CC_WORD, CC_XDIGIT
+ };
+
+static char const *const cclass_name[] =
+ {
+ "",
+ "ascii", "alnum", "alpha", "blank", "cntrl", "digit", "graph",
+ "lower", "print", "punct", "space", "upper", "word", "xdigit"
+ };
+
+#define N_CHAR_CLASS (sizeof(cclass_name) / sizeof (cclass_name[0]))
+
+static int
+is_cclass (c, name)
+ int c;
+ const char *name;
+{
+ enum char_class char_class = CC_NO_CLASS;
+ int i, result;
+
+ for (i = 1; i < N_CHAR_CLASS; i++)
+ {
+ if (STREQ (name, cclass_name[i]))
+ {
+ char_class = (enum char_class)i;
+ break;
+ }
+ }
+
+ if (char_class == 0)
+ return -1;
+
+ switch (char_class)
+ {
+ case CC_ASCII:
+ result = isascii (c);
+ break;
+ case CC_ALNUM:
+ result = ISALNUM (c);
+ break;
+ case CC_ALPHA:
+ result = ISALPHA (c);
+ break;
+ case CC_BLANK:
+ result = ISBLANK (c);
+ break;
+ case CC_CNTRL:
+ result = ISCNTRL (c);
+ break;
+ case CC_DIGIT:
+ result = ISDIGIT (c);
+ break;
+ case CC_GRAPH:
+ result = ISGRAPH (c);
+ break;
+ case CC_LOWER:
+ result = ISLOWER (c);
+ break;
+ case CC_PRINT:
+ result = ISPRINT (c);
+ break;
+ case CC_PUNCT:
+ result = ISPUNCT (c);
+ break;
+ case CC_SPACE:
+ result = ISSPACE (c);
+ break;
+ case CC_UPPER:
+ result = ISUPPER (c);
+ break;
+ case CC_WORD:
+ result = (ISALNUM (c) || c == '_');
+ break;
+ case CC_XDIGIT:
+ result = ISXDIGIT (c);
+ break;
+ default:
+ result = -1;
+ break;
+ }
+
+ return result;
+}
+
+/* Now include `sm_loop.c' for single-byte characters. */
+/* The result of FOLD is an `unsigned char' */
+# define FOLD(c) ((flags & FNM_CASEFOLD) \
+ ? TOLOWER ((unsigned char)c) \
+ : ((unsigned char)c))
+
+#define FCT internal_strmatch
+#define GMATCH gmatch
+#define COLLSYM collsym
+#define PARSE_COLLSYM parse_collsym
+#define BRACKMATCH brackmatch
+#define PATSCAN patscan
+#define STRCOMPARE strcompare
+#define EXTMATCH extmatch
+#define STRCHR(S, C) strchr((S), (C))
+#define STRCOLL(S1, S2) strcoll((S1), (S2))
+#define STRLEN(S) strlen(S)
+#define STRCMP(S1, S2) strcmp((S1), (S2))
+#define RANGECMP(C1, C2) rangecmp((C1), (C2))
+#define COLLEQUIV(C1, C2) collequiv((C1), (C2))
+#define CTYPE_T enum char_class
+#define IS_CCLASS(C, S) is_cclass((C), (S))
+#include "sm_loop.c"
+
+#if HANDLE_MULTIBYTE
+
+# define CHAR wchar_t
+# define U_CHAR wint_t
+# define XCHAR wchar_t
+# define INT wint_t
+# define L(CS) L##CS
+# define INVALID WEOF
+
+# undef STREQ
+# undef STREQN
+# define STREQ(s1, s2) ((wcscmp (s1, s2) == 0))
+# define STREQN(a, b, n) ((a)[0] == (b)[0] && wcsncmp(a, b, n) == 0)
+
+static int
+rangecmp_wc (c1, c2)
+ wint_t c1, c2;
+{
+ static wchar_t s1[2] = { L' ', L'\0' };
+ static wchar_t s2[2] = { L' ', L'\0' };
+ int ret;
+
+ if (c1 == c2)
+ return 0;
+
+ s1[0] = c1;
+ s2[0] = c2;
+
+ return (wcscoll (s1, s2));
+}
+
+static int
+collequiv_wc (c, equiv)
+ wint_t c, equiv;
+{
+ return (!(c - equiv));
+}
+
+/* Helper function for collating symbol. */
+# define _COLLSYM _collwcsym
+# define __COLLSYM __collwcsym
+# define POSIXCOLL posix_collwcsyms
+# include "collsyms.h"
+
+static wint_t
+collwcsym (s, len)
+ wchar_t *s;
+ int len;
+{
+ register struct _collwcsym *csp;
+
+ for (csp = posix_collwcsyms; csp->name; csp++)
+ {
+ if (STREQN(csp->name, s, len) && csp->name[len] == L'\0')
+ return (csp->code);
+ }
+ if (len == 1)
+ return s[0];
+ return INVALID;
+}
+
+static int
+is_wcclass (wc, name)
+ wint_t wc;
+ wchar_t *name;
+{
+ char *mbs;
+ mbstate_t state;
+ size_t mbslength;
+ wctype_t desc;
+ int want_word;
+
+ if ((wctype ("ascii") == (wctype_t)0) && (wcscmp (name, L"ascii") == 0))
+ {
+ int c;
+
+ if ((c = wctob (wc)) == EOF)
+ return 0;
+ else
+ return (c <= 0x7F);
+ }
+
+ want_word = (wcscmp (name, L"word") == 0);
+ if (want_word)
+ name = L"alnum";
+
+ memset (&state, '\0', sizeof (mbstate_t));
+ mbs = (char *) malloc (wcslen(name) * MB_CUR_MAX + 1);
+ mbslength = wcsrtombs(mbs, (const wchar_t **)&name, (wcslen(name) * MB_CUR_MAX + 1), &state);
+
+ if (mbslength == (size_t)-1 || mbslength == (size_t)-2)
+ {
+ free (mbs);
+ return -1;
+ }
+ desc = wctype (mbs);
+ free (mbs);
+
+ if (desc == (wctype_t)0)
+ return -1;
+
+ if (want_word)
+ return (iswctype (wc, desc) || wc == L'_');
+ else
+ return (iswctype (wc, desc));
+}
+
+/* Now include `sm_loop.c' for multibyte characters. */
+#define FOLD(c) ((flags & FNM_CASEFOLD) && iswupper (c) ? towlower (c) : (c))
+#define FCT internal_wstrmatch
+#define GMATCH gmatch_wc
+#define COLLSYM collwcsym
+#define PARSE_COLLSYM parse_collwcsym
+#define BRACKMATCH brackmatch_wc
+#define PATSCAN patscan_wc
+#define STRCOMPARE wscompare
+#define EXTMATCH extmatch_wc
+#define STRCHR(S, C) wcschr((S), (C))
+#define STRCOLL(S1, S2) wcscoll((S1), (S2))
+#define STRLEN(S) wcslen(S)
+#define STRCMP(S1, S2) wcscmp((S1), (S2))
+#define RANGECMP(C1, C2) rangecmp_wc((C1), (C2))
+#define COLLEQUIV(C1, C2) collequiv_wc((C1), (C2))
+#define CTYPE_T enum char_class
+#define IS_CCLASS(C, S) is_wcclass((C), (S))
+#include "sm_loop.c"
+
+#endif /* HAVE_MULTIBYTE */
+
+int
+xstrmatch (pattern, string, flags)
+ char *pattern;
+ char *string;
+ int flags;
+{
+#if HANDLE_MULTIBYTE
+ int ret;
+ size_t n;
+ wchar_t *wpattern, *wstring;
+
+ if (MB_CUR_MAX == 1)
+ return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
+
+ n = xdupmbstowcs (&wpattern, NULL, pattern);
+ if (n == (size_t)-1 || n == (size_t)-2)
+ return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
+
+ n = xdupmbstowcs (&wstring, NULL, string);
+ if (n == (size_t)-1 || n == (size_t)-2)
+ {
+ free (wpattern);
+ return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
+ }
+
+ ret = internal_wstrmatch (wpattern, wstring, flags);
+
+ free (wpattern);
+ free (wstring);
+
+ return ret;
+#else
+ return (internal_strmatch ((unsigned char *)pattern, (unsigned char *)string, flags));
+#endif /* !HANDLE_MULTIBYTE */
+}