| [e84c831] | 1 | Submitted By: Ken Moffat <ken at linuxfromscratch dot org> | 
|---|
|  | 2 | Date: 2008-02-19 | 
|---|
|  | 3 | Initial Package Version: 2.5.3 | 
|---|
|  | 4 | Upstream Status: uncertain | 
|---|
|  | 5 | Origin: from debian. | 
|---|
|  | 6 | Description: Various fixes, particularly speed improvements for UTF-8 locales. | 
|---|
|  | 7 | Also adds a 'standard input' marker into the results for certain obscure uses. | 
|---|
|  | 8 |  | 
|---|
|  | 9 | diff -Naur grep-2.5.3.orig/lib/posix/regex.h grep-2.5.3.lfs/lib/posix/regex.h | 
|---|
|  | 10 | --- grep-2.5.3.orig/lib/posix/regex.h   2007-06-28 19:57:18.000000000 +0100 | 
|---|
|  | 11 | +++ grep-2.5.3.lfs/lib/posix/regex.h    2008-02-10 18:56:07.000000000 +0000 | 
|---|
|  | 12 | @@ -165,6 +165,10 @@ | 
|---|
|  | 13 | treated as 'a\{1'.  */ | 
|---|
|  | 14 | #define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1) | 
|---|
|  | 15 |  | 
|---|
|  | 16 | +/* If this bit is set, then ignore case when matching. | 
|---|
|  | 17 | +   If not set, then case is significant.  */ | 
|---|
|  | 18 | +#define RE_ICASE (RE_INVALID_INTERVAL_ORD << 1) | 
|---|
|  | 19 | + | 
|---|
|  | 20 | /* This global variable defines the particular regexp syntax to use (for | 
|---|
|  | 21 | some interfaces).  When a regexp is compiled, the syntax used is | 
|---|
|  | 22 | stored in the pattern buffer, so changing this does not affect | 
|---|
|  | 23 | diff -Naur grep-2.5.3.orig/src/dfa.c grep-2.5.3.lfs/src/dfa.c | 
|---|
|  | 24 | --- grep-2.5.3.orig/src/dfa.c   2007-06-28 19:57:19.000000000 +0100 | 
|---|
|  | 25 | +++ grep-2.5.3.lfs/src/dfa.c    2008-02-10 18:55:29.000000000 +0000 | 
|---|
|  | 26 | @@ -594,6 +594,17 @@ | 
|---|
|  | 27 | /* build character class.  */ | 
|---|
|  | 28 | { | 
|---|
|  | 29 | wctype_t wt; | 
|---|
|  | 30 | +                 /* NOTE: | 
|---|
|  | 31 | +                  * when case_fold, character class [:upper:] and [:lower:] | 
|---|
|  | 32 | +                  * should be treated as [:alpha:], this is the same way | 
|---|
|  | 33 | +                  * of glibc/posix/regcomp.c:build_charclass(). | 
|---|
|  | 34 | +                  * reported by Bug#276202 | 
|---|
|  | 35 | +                  * - fixed by Fumitoshi UKAI | 
|---|
|  | 36 | +                  */ | 
|---|
|  | 37 | +                 if (case_fold | 
|---|
|  | 38 | +                     && (strcmp (str, "upper") == 0 || strcmp (str, "lower") == 0)) | 
|---|
|  | 39 | +                     strcpy (str, "alpha"); | 
|---|
|  | 40 | + | 
|---|
|  | 41 | /* Query the character class as wctype_t.  */ | 
|---|
|  | 42 | wt = wctype (str); | 
|---|
|  | 43 |  | 
|---|
|  | 44 | @@ -681,6 +692,29 @@ | 
|---|
|  | 45 | REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, | 
|---|
|  | 46 | range_ends_al, work_mbc->nranges + 1); | 
|---|
|  | 47 | work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)wc2; | 
|---|
|  | 48 | +         if (case_fold | 
|---|
|  | 49 | +             && (iswlower((wint_t)wc) || iswupper((wint_t)wc)) | 
|---|
|  | 50 | +             && (iswlower((wint_t)wc2) || iswupper((wint_t)wc2))) { | 
|---|
|  | 51 | +           wint_t altcase; | 
|---|
|  | 52 | +           altcase = wc; | 
|---|
|  | 53 | +           if (iswlower((wint_t)wc)) | 
|---|
|  | 54 | +             altcase = towupper((wint_t)wc); | 
|---|
|  | 55 | +           else | 
|---|
|  | 56 | +             altcase = towlower((wint_t)wc); | 
|---|
|  | 57 | +           REALLOC_IF_NECESSARY(work_mbc->range_sts, wchar_t, | 
|---|
|  | 58 | +                                range_sts_al, work_mbc->nranges + 1); | 
|---|
|  | 59 | +           work_mbc->range_sts[work_mbc->nranges] = (wchar_t)altcase; | 
|---|
|  | 60 | + | 
|---|
|  | 61 | +           altcase = wc2; | 
|---|
|  | 62 | +           if (iswlower((wint_t)wc2)) | 
|---|
|  | 63 | +             altcase = towupper((wint_t)wc2); | 
|---|
|  | 64 | +           else | 
|---|
|  | 65 | +             altcase = towlower((wint_t)wc2); | 
|---|
|  | 66 | +           REALLOC_IF_NECESSARY(work_mbc->range_ends, wchar_t, | 
|---|
|  | 67 | +                                range_ends_al, work_mbc->nranges + 1); | 
|---|
|  | 68 | +           work_mbc->range_ends[work_mbc->nranges++] = (wchar_t)altcase; | 
|---|
|  | 69 | + | 
|---|
|  | 70 | +         } | 
|---|
|  | 71 | } | 
|---|
|  | 72 | else if (wc != WEOF) | 
|---|
|  | 73 | /* build normal characters.  */ | 
|---|
|  | 74 | @@ -688,6 +722,20 @@ | 
|---|
|  | 75 | REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, | 
|---|
|  | 76 | work_mbc->nchars + 1); | 
|---|
|  | 77 | work_mbc->chars[work_mbc->nchars++] = (wchar_t)wc; | 
|---|
|  | 78 | +         if (case_fold && (iswlower((wint_t) wc) || iswupper((wint_t) wc))) | 
|---|
|  | 79 | +           { | 
|---|
|  | 80 | +               wint_t altcase; | 
|---|
|  | 81 | + | 
|---|
|  | 82 | +               altcase = wc;           /* keeps compiler happy */ | 
|---|
|  | 83 | +               if (iswlower((wint_t) wc)) | 
|---|
|  | 84 | +                 altcase = towupper((wint_t) wc); | 
|---|
|  | 85 | +               else if (iswupper((wint_t) wc)) | 
|---|
|  | 86 | +                 altcase = towlower((wint_t) wc); | 
|---|
|  | 87 | + | 
|---|
|  | 88 | +               REALLOC_IF_NECESSARY(work_mbc->chars, wchar_t, chars_al, | 
|---|
|  | 89 | +                              work_mbc->nchars + 1); | 
|---|
|  | 90 | +               work_mbc->chars[work_mbc->nchars++] = (wchar_t) altcase; | 
|---|
|  | 91 | +           } | 
|---|
|  | 92 | } | 
|---|
|  | 93 | } | 
|---|
|  | 94 | while ((wc = wc1) != L']'); | 
|---|
|  | 95 | diff -Naur grep-2.5.3.orig/src/grep.c grep-2.5.3.lfs/src/grep.c | 
|---|
|  | 96 | --- grep-2.5.3.orig/src/grep.c  2007-06-28 19:57:19.000000000 +0100 | 
|---|
|  | 97 | +++ grep-2.5.3.lfs/src/grep.c   2008-02-10 18:54:53.000000000 +0000 | 
|---|
|  | 98 | @@ -274,6 +274,12 @@ | 
|---|
|  | 99 | #endif | 
|---|
|  | 100 | ; | 
|---|
|  | 101 |  | 
|---|
|  | 102 | +/* Default for `file_list' if no files are given on the command line. */ | 
|---|
|  | 103 | +static char *stdin_argv[] = | 
|---|
|  | 104 | +{ | 
|---|
|  | 105 | +  "-", NULL | 
|---|
|  | 106 | +}; | 
|---|
|  | 107 | + | 
|---|
|  | 108 | /* Non-boolean long options that have no corresponding short equivalents.  */ | 
|---|
|  | 109 | enum | 
|---|
|  | 110 | { | 
|---|
|  | 111 | @@ -534,7 +540,16 @@ | 
|---|
|  | 112 | for byte sentinels fore and aft.  */ | 
|---|
|  | 113 | newalloc = newsize + pagesize + 1; | 
|---|
|  | 114 |  | 
|---|
|  | 115 | -      newbuf = bufalloc < newalloc ? xmalloc (bufalloc = newalloc) : buffer; | 
|---|
|  | 116 | +      newbuf = bufalloc < newalloc ? malloc (bufalloc = newalloc) : buffer; | 
|---|
|  | 117 | +      if (newbuf == NULL) | 
|---|
|  | 118 | +       { | 
|---|
|  | 119 | +         int saved_errno = errno; | 
|---|
|  | 120 | +         free (buffer); | 
|---|
|  | 121 | +         bufalloc = ALIGN_TO (INITIAL_BUFSIZE, pagesize) + pagesize + 1; | 
|---|
|  | 122 | +         buffer = xmalloc (bufalloc); | 
|---|
|  | 123 | +         errno = saved_errno; | 
|---|
|  | 124 | +         return 0; | 
|---|
|  | 125 | +       } | 
|---|
|  | 126 | readbuf = ALIGN_TO (newbuf + 1 + save, pagesize); | 
|---|
|  | 127 | bufbeg = readbuf - save; | 
|---|
|  | 128 | memmove (bufbeg, buffer + saved_offset, save); | 
|---|
|  | 129 | @@ -1825,6 +1840,7 @@ | 
|---|
|  | 130 | FILE *fp; | 
|---|
|  | 131 | extern char *optarg; | 
|---|
|  | 132 | extern int optind; | 
|---|
|  | 133 | +  char **file_list; | 
|---|
|  | 134 |  | 
|---|
|  | 135 | initialize_main (&argc, &argv); | 
|---|
|  | 136 | program_name = argv[0]; | 
|---|
|  | 137 | @@ -2244,29 +2260,29 @@ | 
|---|
|  | 138 | if (max_count == 0) | 
|---|
|  | 139 | exit (1); | 
|---|
|  | 140 |  | 
|---|
|  | 141 | -  if (optind < argc) | 
|---|
|  | 142 | +  file_list = (optind == argc ? stdin_argv : &argv[optind]); | 
|---|
|  | 143 | + | 
|---|
|  | 144 | +  status = 1; | 
|---|
|  | 145 | +  while (1) | 
|---|
|  | 146 | { | 
|---|
|  | 147 | -       status = 1; | 
|---|
|  | 148 | -       do | 
|---|
|  | 149 | +      char *file = *file_list++; | 
|---|
|  | 150 | + | 
|---|
|  | 151 | +      if (file == NULL) | 
|---|
|  | 152 | +       break; | 
|---|
|  | 153 | + | 
|---|
|  | 154 | +      if ((included_patterns || excluded_patterns) | 
|---|
|  | 155 | +         && !isdir (file)) | 
|---|
|  | 156 | { | 
|---|
|  | 157 | -         char *file = argv[optind]; | 
|---|
|  | 158 | -         if ((included_patterns || excluded_patterns) | 
|---|
|  | 159 | -             && !isdir (file)) | 
|---|
|  | 160 | -           { | 
|---|
|  | 161 | -             if (included_patterns && | 
|---|
|  | 162 | -                 ! excluded_filename (included_patterns, file, 0)) | 
|---|
|  | 163 | -               continue; | 
|---|
|  | 164 | -             if (excluded_patterns && | 
|---|
|  | 165 | -                 excluded_filename (excluded_patterns, file, 0)) | 
|---|
|  | 166 | -               continue; | 
|---|
|  | 167 | -           } | 
|---|
|  | 168 | -         status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file, | 
|---|
|  | 169 | -                             &stats_base); | 
|---|
|  | 170 | +         if (included_patterns && | 
|---|
|  | 171 | +             ! excluded_filename (included_patterns, file, 0)) | 
|---|
|  | 172 | +           continue; | 
|---|
|  | 173 | +         if (excluded_patterns && | 
|---|
|  | 174 | +             excluded_filename (excluded_patterns, file, 0)) | 
|---|
|  | 175 | +           continue; | 
|---|
|  | 176 | } | 
|---|
|  | 177 | -       while ( ++optind < argc); | 
|---|
|  | 178 | +      status &= grepfile (strcmp (file, "-") == 0 | 
|---|
|  | 179 | +                         ? (char *) NULL : file, &stats_base); | 
|---|
|  | 180 | } | 
|---|
|  | 181 | -  else | 
|---|
|  | 182 | -    status = grepfile ((char *) NULL, &stats_base); | 
|---|
|  | 183 |  | 
|---|
|  | 184 | /* We register via atexit() to test stdout.  */ | 
|---|
|  | 185 | exit (errseen ? 2 : status); | 
|---|
|  | 186 | diff -Naur grep-2.5.3.orig/src/search.c grep-2.5.3.lfs/src/search.c | 
|---|
|  | 187 | --- grep-2.5.3.orig/src/search.c        2007-06-28 19:57:19.000000000 +0100 | 
|---|
|  | 188 | +++ grep-2.5.3.lfs/src/search.c 2008-02-10 18:56:18.000000000 +0000 | 
|---|
|  | 189 | @@ -18,10 +18,15 @@ | 
|---|
|  | 190 |  | 
|---|
|  | 191 | /* Written August 1992 by Mike Haertel. */ | 
|---|
|  | 192 |  | 
|---|
|  | 193 | +#ifndef _GNU_SOURCE | 
|---|
|  | 194 | +# define _GNU_SOURCE 1 | 
|---|
|  | 195 | +#endif | 
|---|
|  | 196 | #ifdef HAVE_CONFIG_H | 
|---|
|  | 197 | # include <config.h> | 
|---|
|  | 198 | #endif | 
|---|
|  | 199 |  | 
|---|
|  | 200 | +#include <assert.h> | 
|---|
|  | 201 | + | 
|---|
|  | 202 | #include <sys/types.h> | 
|---|
|  | 203 |  | 
|---|
|  | 204 | #include "mbsupport.h" | 
|---|
|  | 205 | @@ -43,6 +48,9 @@ | 
|---|
|  | 206 | #ifdef HAVE_LIBPCRE | 
|---|
|  | 207 | # include <pcre.h> | 
|---|
|  | 208 | #endif | 
|---|
|  | 209 | +#ifdef HAVE_LANGINFO_CODESET | 
|---|
|  | 210 | +# include <langinfo.h> | 
|---|
|  | 211 | +#endif | 
|---|
|  | 212 |  | 
|---|
|  | 213 | #define NCHAR (UCHAR_MAX + 1) | 
|---|
|  | 214 |  | 
|---|
|  | 215 | @@ -68,6 +76,19 @@ | 
|---|
|  | 216 | error (2, 0, _("memory exhausted")); | 
|---|
|  | 217 | } | 
|---|
|  | 218 |  | 
|---|
|  | 219 | +/* UTF-8 encoding allows some optimizations that we can't otherwise | 
|---|
|  | 220 | +   assume in a multibyte encoding. */ | 
|---|
|  | 221 | +static int using_utf8; | 
|---|
|  | 222 | + | 
|---|
|  | 223 | +void | 
|---|
|  | 224 | +check_utf8 (void) | 
|---|
|  | 225 | +{ | 
|---|
|  | 226 | +#ifdef HAVE_LANGINFO_CODESET | 
|---|
|  | 227 | +  if (strcmp (nl_langinfo (CODESET), "UTF-8") == 0) | 
|---|
|  | 228 | +    using_utf8 = 1; | 
|---|
|  | 229 | +#endif | 
|---|
|  | 230 | +} | 
|---|
|  | 231 | + | 
|---|
|  | 232 | #ifndef FGREP_PROGRAM | 
|---|
|  | 233 | /* DFA compiled regexp. */ | 
|---|
|  | 234 | static struct dfa dfa; | 
|---|
|  | 235 | @@ -134,49 +155,6 @@ | 
|---|
|  | 236 | } | 
|---|
|  | 237 | #endif /* !FGREP_PROGRAM */ | 
|---|
|  | 238 |  | 
|---|
|  | 239 | -#ifdef MBS_SUPPORT | 
|---|
|  | 240 | -/* This function allocate the array which correspond to "buf". | 
|---|
|  | 241 | -   Then this check multibyte string and mark on the positions which | 
|---|
|  | 242 | -   are not single byte character nor the first byte of a multibyte | 
|---|
|  | 243 | -   character.  Caller must free the array.  */ | 
|---|
|  | 244 | -static char* | 
|---|
|  | 245 | -check_multibyte_string(char const *buf, size_t size) | 
|---|
|  | 246 | -{ | 
|---|
|  | 247 | -  char *mb_properties = xmalloc(size); | 
|---|
|  | 248 | -  mbstate_t cur_state; | 
|---|
|  | 249 | -  wchar_t wc; | 
|---|
|  | 250 | -  int i; | 
|---|
|  | 251 | - | 
|---|
|  | 252 | -  memset(&cur_state, 0, sizeof(mbstate_t)); | 
|---|
|  | 253 | -  memset(mb_properties, 0, sizeof(char)*size); | 
|---|
|  | 254 | - | 
|---|
|  | 255 | -  for (i = 0; i < size ;) | 
|---|
|  | 256 | -    { | 
|---|
|  | 257 | -      size_t mbclen; | 
|---|
|  | 258 | -      mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state); | 
|---|
|  | 259 | - | 
|---|
|  | 260 | -      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0) | 
|---|
|  | 261 | -       { | 
|---|
|  | 262 | -         /* An invalid sequence, or a truncated multibyte character. | 
|---|
|  | 263 | -            We treat it as a single byte character.  */ | 
|---|
|  | 264 | -         mbclen = 1; | 
|---|
|  | 265 | -       } | 
|---|
|  | 266 | -      else if (match_icase) | 
|---|
|  | 267 | -       { | 
|---|
|  | 268 | -         if (iswupper((wint_t)wc)) | 
|---|
|  | 269 | -           { | 
|---|
|  | 270 | -             wc = towlower((wint_t)wc); | 
|---|
|  | 271 | -             wcrtomb(buf + i, wc, &cur_state); | 
|---|
|  | 272 | -           } | 
|---|
|  | 273 | -       } | 
|---|
|  | 274 | -      mb_properties[i] = mbclen; | 
|---|
|  | 275 | -      i += mbclen; | 
|---|
|  | 276 | -    } | 
|---|
|  | 277 | - | 
|---|
|  | 278 | -  return mb_properties; | 
|---|
|  | 279 | -} | 
|---|
|  | 280 | -#endif /* MBS_SUPPORT */ | 
|---|
|  | 281 | - | 
|---|
|  | 282 | #if defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) | 
|---|
|  | 283 | #ifdef EGREP_PROGRAM | 
|---|
|  | 284 | COMPILE_FCT(Ecompile) | 
|---|
|  | 285 | @@ -193,10 +171,9 @@ | 
|---|
|  | 286 | size_t total = size; | 
|---|
|  | 287 | char const *motif = pattern; | 
|---|
|  | 288 |  | 
|---|
|  | 289 | -#if 0 | 
|---|
|  | 290 | +  check_utf8 (); | 
|---|
|  | 291 | if (match_icase) | 
|---|
|  | 292 | syntax_bits |= RE_ICASE; | 
|---|
|  | 293 | -#endif | 
|---|
|  | 294 | re_set_syntax (syntax_bits); | 
|---|
|  | 295 | dfasyntax (syntax_bits, match_icase, eolbyte); | 
|---|
|  | 296 |  | 
|---|
|  | 297 | @@ -301,23 +278,35 @@ | 
|---|
|  | 298 | char eol = eolbyte; | 
|---|
|  | 299 | int backref, start, len, best_len; | 
|---|
|  | 300 | struct kwsmatch kwsm; | 
|---|
|  | 301 | +  static int use_dfa; | 
|---|
|  | 302 | +  static int use_dfa_checked = 0; | 
|---|
|  | 303 | size_t i, ret_val; | 
|---|
|  | 304 | #ifdef MBS_SUPPORT | 
|---|
|  | 305 | -  char *mb_properties = NULL; | 
|---|
|  | 306 | -  if (MB_CUR_MAX > 1) | 
|---|
|  | 307 | +  const char *last_char = NULL; | 
|---|
|  | 308 | +  int mb_cur_max = MB_CUR_MAX; | 
|---|
|  | 309 | +  mbstate_t mbs; | 
|---|
|  | 310 | +  memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 311 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 312 | + | 
|---|
|  | 313 | +  if (!use_dfa_checked) | 
|---|
|  | 314 | { | 
|---|
|  | 315 | -      if (match_icase) | 
|---|
|  | 316 | -        { | 
|---|
|  | 317 | -          char *case_buf = xmalloc(size); | 
|---|
|  | 318 | -          memcpy(case_buf, buf, size); | 
|---|
|  | 319 | -         if (start_ptr) | 
|---|
|  | 320 | -           start_ptr = case_buf + (start_ptr - buf); | 
|---|
|  | 321 | -          buf = case_buf; | 
|---|
|  | 322 | -        } | 
|---|
|  | 323 | -      if (kwset) | 
|---|
|  | 324 | -        mb_properties = check_multibyte_string(buf, size); | 
|---|
|  | 325 | -    } | 
|---|
|  | 326 | +      char *grep_use_dfa = getenv ("GREP_USE_DFA"); | 
|---|
|  | 327 | +      if (!grep_use_dfa) | 
|---|
|  | 328 | +       { | 
|---|
|  | 329 | +#ifdef MBS_SUPPORT | 
|---|
|  | 330 | +         /* Turn off DFA when processing multibyte input. */ | 
|---|
|  | 331 | +         use_dfa = (MB_CUR_MAX == 1); | 
|---|
|  | 332 | +#else | 
|---|
|  | 333 | +         use_dfa = 1; | 
|---|
|  | 334 | #endif /* MBS_SUPPORT */ | 
|---|
|  | 335 | +       } | 
|---|
|  | 336 | +      else | 
|---|
|  | 337 | +       { | 
|---|
|  | 338 | +         use_dfa = atoi (grep_use_dfa); | 
|---|
|  | 339 | +       } | 
|---|
|  | 340 | + | 
|---|
|  | 341 | +      use_dfa_checked = 1; | 
|---|
|  | 342 | +    } | 
|---|
|  | 343 |  | 
|---|
|  | 344 | buflim = buf + size; | 
|---|
|  | 345 |  | 
|---|
|  | 346 | @@ -329,40 +318,123 @@ | 
|---|
|  | 347 | if (kwset) | 
|---|
|  | 348 | { | 
|---|
|  | 349 | /* Find a possible match using the KWset matcher. */ | 
|---|
|  | 350 | -             size_t offset = kwsexec (kwset, beg, buflim - beg, &kwsm); | 
|---|
|  | 351 | +#ifdef MBS_SUPPORT | 
|---|
|  | 352 | +             size_t bytes_left = 0; | 
|---|
|  | 353 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 354 | +             size_t offset; | 
|---|
|  | 355 | +#ifdef MBS_SUPPORT | 
|---|
|  | 356 | +             /* kwsexec doesn't work with match_icase and multibyte input. */ | 
|---|
|  | 357 | +             if (match_icase && mb_cur_max > 1) | 
|---|
|  | 358 | +               /* Avoid kwset */ | 
|---|
|  | 359 | +               offset = 0; | 
|---|
|  | 360 | +             else | 
|---|
|  | 361 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 362 | +             offset = kwsexec (kwset, beg, buflim - beg, &kwsm); | 
|---|
|  | 363 | if (offset == (size_t) -1) | 
|---|
|  | 364 | -               goto failure; | 
|---|
|  | 365 | +               return (size_t)-1; | 
|---|
|  | 366 | +#ifdef MBS_SUPPORT | 
|---|
|  | 367 | +             if (mb_cur_max > 1 && !using_utf8) | 
|---|
|  | 368 | +               { | 
|---|
|  | 369 | +                 bytes_left = offset; | 
|---|
|  | 370 | +                 while (bytes_left) | 
|---|
|  | 371 | +                   { | 
|---|
|  | 372 | +                     size_t mlen = mbrlen (beg, bytes_left, &mbs); | 
|---|
|  | 373 | + | 
|---|
|  | 374 | +                     last_char = beg; | 
|---|
|  | 375 | +                     if (mlen == (size_t) -1 || mlen == 0) | 
|---|
|  | 376 | +                       { | 
|---|
|  | 377 | +                         /* Incomplete character: treat as single-byte. */ | 
|---|
|  | 378 | +                         memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 379 | +                         beg++; | 
|---|
|  | 380 | +                         bytes_left--; | 
|---|
|  | 381 | +                         continue; | 
|---|
|  | 382 | +                       } | 
|---|
|  | 383 | + | 
|---|
|  | 384 | +                     if (mlen == (size_t) -2) | 
|---|
|  | 385 | +                       /* Offset points inside multibyte character: | 
|---|
|  | 386 | +                        * no good. */ | 
|---|
|  | 387 | +                       break; | 
|---|
|  | 388 | + | 
|---|
|  | 389 | +                     beg += mlen; | 
|---|
|  | 390 | +                     bytes_left -= mlen; | 
|---|
|  | 391 | +                   } | 
|---|
|  | 392 | +               } | 
|---|
|  | 393 | +             else | 
|---|
|  | 394 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 395 | beg += offset; | 
|---|
|  | 396 | /* Narrow down to the line containing the candidate, and | 
|---|
|  | 397 | run it through DFA. */ | 
|---|
|  | 398 | end = memchr(beg, eol, buflim - beg); | 
|---|
|  | 399 | end++; | 
|---|
|  | 400 | #ifdef MBS_SUPPORT | 
|---|
|  | 401 | -             if (MB_CUR_MAX > 1 && mb_properties[beg - buf] == 0) | 
|---|
|  | 402 | +             if (mb_cur_max > 1 && bytes_left) | 
|---|
|  | 403 | continue; | 
|---|
|  | 404 | #endif | 
|---|
|  | 405 | while (beg > buf && beg[-1] != eol) | 
|---|
|  | 406 | --beg; | 
|---|
|  | 407 | -             if (kwsm.index < kwset_exact_matches) | 
|---|
|  | 408 | +             if ( | 
|---|
|  | 409 | +#ifdef MBS_SUPPORT | 
|---|
|  | 410 | +                 !(match_icase && mb_cur_max > 1) && | 
|---|
|  | 411 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 412 | +                 (kwsm.index < kwset_exact_matches)) | 
|---|
|  | 413 | goto success; | 
|---|
|  | 414 | -             if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) | 
|---|
|  | 415 | +             if (use_dfa && | 
|---|
|  | 416 | +                 dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1) | 
|---|
|  | 417 | continue; | 
|---|
|  | 418 | } | 
|---|
|  | 419 | else | 
|---|
|  | 420 | { | 
|---|
|  | 421 | /* No good fixed strings; start with DFA. */ | 
|---|
|  | 422 | -             size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref); | 
|---|
|  | 423 | +#ifdef MBS_SUPPORT | 
|---|
|  | 424 | +             size_t bytes_left = 0; | 
|---|
|  | 425 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 426 | +             size_t offset = 0; | 
|---|
|  | 427 | +             if (use_dfa) | 
|---|
|  | 428 | +               offset = dfaexec (&dfa, beg, buflim - beg, &backref); | 
|---|
|  | 429 | if (offset == (size_t) -1) | 
|---|
|  | 430 | break; | 
|---|
|  | 431 | /* Narrow down to the line we've found. */ | 
|---|
|  | 432 | +#ifdef MBS_SUPPORT | 
|---|
|  | 433 | +             if (mb_cur_max > 1 && !using_utf8) | 
|---|
|  | 434 | +               { | 
|---|
|  | 435 | +                 bytes_left = offset; | 
|---|
|  | 436 | +                 while (bytes_left) | 
|---|
|  | 437 | +                   { | 
|---|
|  | 438 | +                     size_t mlen = mbrlen (beg, bytes_left, &mbs); | 
|---|
|  | 439 | + | 
|---|
|  | 440 | +                     last_char = beg; | 
|---|
|  | 441 | +                     if (mlen == (size_t) -1 || mlen == 0) | 
|---|
|  | 442 | +                       { | 
|---|
|  | 443 | +                         /* Incomplete character: treat as single-byte. */ | 
|---|
|  | 444 | +                         memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 445 | +                         beg++; | 
|---|
|  | 446 | +                         bytes_left--; | 
|---|
|  | 447 | +                         continue; | 
|---|
|  | 448 | +                       } | 
|---|
|  | 449 | + | 
|---|
|  | 450 | +                     if (mlen == (size_t) -2) | 
|---|
|  | 451 | +                       /* Offset points inside multibyte character: | 
|---|
|  | 452 | +                        * no good. */ | 
|---|
|  | 453 | +                       break; | 
|---|
|  | 454 | + | 
|---|
|  | 455 | +                     beg += mlen; | 
|---|
|  | 456 | +                     bytes_left -= mlen; | 
|---|
|  | 457 | +                   } | 
|---|
|  | 458 | +               } | 
|---|
|  | 459 | +             else | 
|---|
|  | 460 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 461 | beg += offset; | 
|---|
|  | 462 | end = memchr (beg, eol, buflim - beg); | 
|---|
|  | 463 | end++; | 
|---|
|  | 464 | +#ifdef MBS_SUPPORT | 
|---|
|  | 465 | +             if (mb_cur_max > 1 && bytes_left) | 
|---|
|  | 466 | +               continue; | 
|---|
|  | 467 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 468 | while (beg > buf && beg[-1] != eol) | 
|---|
|  | 469 | --beg; | 
|---|
|  | 470 | } | 
|---|
|  | 471 | /* Successful, no backreferences encountered! */ | 
|---|
|  | 472 | -         if (!backref) | 
|---|
|  | 473 | +         if (use_dfa && !backref) | 
|---|
|  | 474 | goto success; | 
|---|
|  | 475 | } | 
|---|
|  | 476 | else | 
|---|
|  | 477 | @@ -408,10 +480,84 @@ | 
|---|
|  | 478 | if (match_words) | 
|---|
|  | 479 | while (match <= best_match) | 
|---|
|  | 480 | { | 
|---|
|  | 481 | -                   if ((match == buf || !WCHAR ((unsigned char) match[-1])) | 
|---|
|  | 482 | -                       && (len == end - beg - 1 | 
|---|
|  | 483 | -                           || !WCHAR ((unsigned char) match[len]))) | 
|---|
|  | 484 | -                     goto assess_pattern_match; | 
|---|
|  | 485 | +                   int lword_match = 0; | 
|---|
|  | 486 | +                   if (match == buf) | 
|---|
|  | 487 | +                     lword_match = 1; | 
|---|
|  | 488 | +                   else | 
|---|
|  | 489 | +                     { | 
|---|
|  | 490 | +                       assert (start > 0); | 
|---|
|  | 491 | +#ifdef MBS_SUPPORT | 
|---|
|  | 492 | +                       if (mb_cur_max > 1) | 
|---|
|  | 493 | +                         { | 
|---|
|  | 494 | +                           const char *s; | 
|---|
|  | 495 | +                           int mr; | 
|---|
|  | 496 | +                           wchar_t pwc; | 
|---|
|  | 497 | +                           if (using_utf8) | 
|---|
|  | 498 | +                             { | 
|---|
|  | 499 | +                               s = match - 1; | 
|---|
|  | 500 | +                               while (s > buf | 
|---|
|  | 501 | +                                      && (unsigned char) *s >= 0x80 | 
|---|
|  | 502 | +                                      && (unsigned char) *s <= 0xbf) | 
|---|
|  | 503 | +                                 --s; | 
|---|
|  | 504 | +                             } | 
|---|
|  | 505 | +                           else | 
|---|
|  | 506 | +                             s = last_char; | 
|---|
|  | 507 | +                           mr = mbtowc (&pwc, s, match - s); | 
|---|
|  | 508 | +                           if (mr <= 0) | 
|---|
|  | 509 | +                             { | 
|---|
|  | 510 | +                               memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 511 | +                               lword_match = 1; | 
|---|
|  | 512 | +                             } | 
|---|
|  | 513 | +                           else if (!(iswalnum (pwc) || pwc == L'_') | 
|---|
|  | 514 | +                                    && mr == (int) (match - s)) | 
|---|
|  | 515 | +                             lword_match = 1; | 
|---|
|  | 516 | +                         } | 
|---|
|  | 517 | +                       else | 
|---|
|  | 518 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 519 | +                       if (!WCHAR ((unsigned char) match[-1])) | 
|---|
|  | 520 | +                         lword_match = 1; | 
|---|
|  | 521 | +                     } | 
|---|
|  | 522 | + | 
|---|
|  | 523 | +                   if (lword_match) | 
|---|
|  | 524 | +                     { | 
|---|
|  | 525 | +                       int rword_match = 0; | 
|---|
|  | 526 | +                       if (start + len == end - beg - 1) | 
|---|
|  | 527 | +                         rword_match = 1; | 
|---|
|  | 528 | +                       else | 
|---|
|  | 529 | +                         { | 
|---|
|  | 530 | +#ifdef MBS_SUPPORT | 
|---|
|  | 531 | +                           if (mb_cur_max > 1) | 
|---|
|  | 532 | +                             { | 
|---|
|  | 533 | +                               wchar_t nwc; | 
|---|
|  | 534 | +                               int mr; | 
|---|
|  | 535 | + | 
|---|
|  | 536 | +                               mr = mbtowc (&nwc, buf + start + len, | 
|---|
|  | 537 | +                                            end - buf - start - len - 1); | 
|---|
|  | 538 | +                               if (mr <= 0) | 
|---|
|  | 539 | +                                 { | 
|---|
|  | 540 | +                                   memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 541 | +                                   rword_match = 1; | 
|---|
|  | 542 | +                                 } | 
|---|
|  | 543 | +                               else if (!iswalnum (nwc) && nwc != L'_') | 
|---|
|  | 544 | +                                 rword_match = 1; | 
|---|
|  | 545 | +                             } | 
|---|
|  | 546 | +                           else | 
|---|
|  | 547 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 548 | +                           if (!WCHAR ((unsigned char) match[len])) | 
|---|
|  | 549 | +                             rword_match = 1; | 
|---|
|  | 550 | +                         } | 
|---|
|  | 551 | + | 
|---|
|  | 552 | +                       if (rword_match) | 
|---|
|  | 553 | +                         { | 
|---|
|  | 554 | +                           if (!start_ptr) | 
|---|
|  | 555 | +                             /* Returns the whole line. */ | 
|---|
|  | 556 | +                             goto success; | 
|---|
|  | 557 | +                           else | 
|---|
|  | 558 | +                             { | 
|---|
|  | 559 | +                               goto assess_pattern_match; | 
|---|
|  | 560 | +                             } | 
|---|
|  | 561 | +                         } | 
|---|
|  | 562 | +                     } | 
|---|
|  | 563 | if (len > 0) | 
|---|
|  | 564 | { | 
|---|
|  | 565 | /* Try a shorter length anchored at the same place. */ | 
|---|
|  | 566 | @@ -475,24 +621,144 @@ | 
|---|
|  | 567 | *match_size = len; | 
|---|
|  | 568 | ret_val = beg - buf; | 
|---|
|  | 569 | out: | 
|---|
|  | 570 | -#ifdef MBS_SUPPORT | 
|---|
|  | 571 | -  if (MB_CUR_MAX > 1) | 
|---|
|  | 572 | -    { | 
|---|
|  | 573 | -      if (match_icase) | 
|---|
|  | 574 | -        free((char*)buf); | 
|---|
|  | 575 | -      if (mb_properties) | 
|---|
|  | 576 | -        free(mb_properties); | 
|---|
|  | 577 | -    } | 
|---|
|  | 578 | -#endif /* MBS_SUPPORT */ | 
|---|
|  | 579 | return ret_val; | 
|---|
|  | 580 | } | 
|---|
|  | 581 | #endif /* defined(GREP_PROGRAM) || defined(EGREP_PROGRAM) */ | 
|---|
|  | 582 |  | 
|---|
|  | 583 | +#ifdef MBS_SUPPORT | 
|---|
|  | 584 | +static int f_i_multibyte; /* whether we're using the new -Fi MB method */ | 
|---|
|  | 585 | +static struct | 
|---|
|  | 586 | +{ | 
|---|
|  | 587 | +  wchar_t **patterns; | 
|---|
|  | 588 | +  size_t count, maxlen; | 
|---|
|  | 589 | +  unsigned char *match; | 
|---|
|  | 590 | +} Fimb; | 
|---|
|  | 591 | +#endif | 
|---|
|  | 592 | + | 
|---|
|  | 593 | #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) | 
|---|
|  | 594 | COMPILE_FCT(Fcompile) | 
|---|
|  | 595 | { | 
|---|
|  | 596 | +  int mb_cur_max = MB_CUR_MAX; | 
|---|
|  | 597 | char const *beg, *lim, *err; | 
|---|
|  | 598 |  | 
|---|
|  | 599 | +  check_utf8 (); | 
|---|
|  | 600 | +#ifdef MBS_SUPPORT | 
|---|
|  | 601 | +  /* Support -F -i for UTF-8 input. */ | 
|---|
|  | 602 | +  if (match_icase && mb_cur_max > 1) | 
|---|
|  | 603 | +    { | 
|---|
|  | 604 | +      mbstate_t mbs; | 
|---|
|  | 605 | +      wchar_t *wcpattern = xmalloc ((size + 1) * sizeof (wchar_t)); | 
|---|
|  | 606 | +      const char *patternend = pattern; | 
|---|
|  | 607 | +      size_t wcsize; | 
|---|
|  | 608 | +      kwset_t fimb_kwset = NULL; | 
|---|
|  | 609 | +      char *starts = NULL; | 
|---|
|  | 610 | +      wchar_t *wcbeg, *wclim; | 
|---|
|  | 611 | +      size_t allocated = 0; | 
|---|
|  | 612 | + | 
|---|
|  | 613 | +      memset (&mbs, '\0', sizeof (mbs)); | 
|---|
|  | 614 | +# ifdef __GNU_LIBRARY__ | 
|---|
|  | 615 | +      wcsize = mbsnrtowcs (wcpattern, &patternend, size, size, &mbs); | 
|---|
|  | 616 | +      if (patternend != pattern + size) | 
|---|
|  | 617 | +       wcsize = (size_t) -1; | 
|---|
|  | 618 | +# else | 
|---|
|  | 619 | +      { | 
|---|
|  | 620 | +       char *patterncopy = xmalloc (size + 1); | 
|---|
|  | 621 | + | 
|---|
|  | 622 | +       memcpy (patterncopy, pattern, size); | 
|---|
|  | 623 | +       patterncopy[size] = '\0'; | 
|---|
|  | 624 | +       patternend = patterncopy; | 
|---|
|  | 625 | +       wcsize = mbsrtowcs (wcpattern, &patternend, size, &mbs); | 
|---|
|  | 626 | +       if (patternend != patterncopy + size) | 
|---|
|  | 627 | +         wcsize = (size_t) -1; | 
|---|
|  | 628 | +       free (patterncopy); | 
|---|
|  | 629 | +      } | 
|---|
|  | 630 | +# endif | 
|---|
|  | 631 | +      if (wcsize + 2 <= 2) | 
|---|
|  | 632 | +       { | 
|---|
|  | 633 | +fimb_fail: | 
|---|
|  | 634 | +         free (wcpattern); | 
|---|
|  | 635 | +         free (starts); | 
|---|
|  | 636 | +         if (fimb_kwset) | 
|---|
|  | 637 | +           kwsfree (fimb_kwset); | 
|---|
|  | 638 | +         free (Fimb.patterns); | 
|---|
|  | 639 | +         Fimb.patterns = NULL; | 
|---|
|  | 640 | +       } | 
|---|
|  | 641 | +      else | 
|---|
|  | 642 | +       { | 
|---|
|  | 643 | +         if (!(fimb_kwset = kwsalloc (NULL))) | 
|---|
|  | 644 | +           error (2, 0, _("memory exhausted")); | 
|---|
|  | 645 | + | 
|---|
|  | 646 | +         starts = xmalloc (mb_cur_max * 3); | 
|---|
|  | 647 | +         wcbeg = wcpattern; | 
|---|
|  | 648 | +         do | 
|---|
|  | 649 | +           { | 
|---|
|  | 650 | +             int i; | 
|---|
|  | 651 | +             size_t wclen; | 
|---|
|  | 652 | + | 
|---|
|  | 653 | +             if (Fimb.count >= allocated) | 
|---|
|  | 654 | +               { | 
|---|
|  | 655 | +                 if (allocated == 0) | 
|---|
|  | 656 | +                   allocated = 128; | 
|---|
|  | 657 | +                 else | 
|---|
|  | 658 | +                   allocated *= 2; | 
|---|
|  | 659 | +                 Fimb.patterns = xrealloc (Fimb.patterns, | 
|---|
|  | 660 | +                                           sizeof (wchar_t *) * allocated); | 
|---|
|  | 661 | +               } | 
|---|
|  | 662 | +             Fimb.patterns[Fimb.count++] = wcbeg; | 
|---|
|  | 663 | +             for (wclim = wcbeg; | 
|---|
|  | 664 | +                  wclim < wcpattern + wcsize && *wclim != L'\n'; ++wclim) | 
|---|
|  | 665 | +               *wclim = towlower (*wclim); | 
|---|
|  | 666 | +             *wclim = L'\0'; | 
|---|
|  | 667 | +             wclen = wclim - wcbeg; | 
|---|
|  | 668 | +             if (wclen > Fimb.maxlen) | 
|---|
|  | 669 | +               Fimb.maxlen = wclen; | 
|---|
|  | 670 | +             if (wclen > 3) | 
|---|
|  | 671 | +               wclen = 3; | 
|---|
|  | 672 | +             if (wclen == 0) | 
|---|
|  | 673 | +               { | 
|---|
|  | 674 | +                 if ((err = kwsincr (fimb_kwset, "", 0)) != 0) | 
|---|
|  | 675 | +                   error (2, 0, err); | 
|---|
|  | 676 | +               } | 
|---|
|  | 677 | +             else | 
|---|
|  | 678 | +               for (i = 0; i < (1 << wclen); i++) | 
|---|
|  | 679 | +                 { | 
|---|
|  | 680 | +                   char *p = starts; | 
|---|
|  | 681 | +                   int j, k; | 
|---|
|  | 682 | + | 
|---|
|  | 683 | +                   for (j = 0; j < wclen; ++j) | 
|---|
|  | 684 | +                     { | 
|---|
|  | 685 | +                       wchar_t wc = wcbeg[j]; | 
|---|
|  | 686 | +                       if (i & (1 << j)) | 
|---|
|  | 687 | +                         { | 
|---|
|  | 688 | +                           wc = towupper (wc); | 
|---|
|  | 689 | +                           if (wc == wcbeg[j]) | 
|---|
|  | 690 | +                             continue; | 
|---|
|  | 691 | +                         } | 
|---|
|  | 692 | +                       k = wctomb (p, wc); | 
|---|
|  | 693 | +                       if (k <= 0) | 
|---|
|  | 694 | +                         goto fimb_fail; | 
|---|
|  | 695 | +                       p += k; | 
|---|
|  | 696 | +                     } | 
|---|
|  | 697 | +                   if ((err = kwsincr (fimb_kwset, starts, p - starts)) != 0) | 
|---|
|  | 698 | +                     error (2, 0, err); | 
|---|
|  | 699 | +                 } | 
|---|
|  | 700 | +             if (wclim < wcpattern + wcsize) | 
|---|
|  | 701 | +               ++wclim; | 
|---|
|  | 702 | +             wcbeg = wclim; | 
|---|
|  | 703 | +           } | 
|---|
|  | 704 | +         while (wcbeg < wcpattern + wcsize); | 
|---|
|  | 705 | +         f_i_multibyte = 1; | 
|---|
|  | 706 | +         kwset = fimb_kwset; | 
|---|
|  | 707 | +         free (starts); | 
|---|
|  | 708 | +         Fimb.match = xmalloc (Fimb.count); | 
|---|
|  | 709 | +         if ((err = kwsprep (kwset)) != 0) | 
|---|
|  | 710 | +           error (2, 0, err); | 
|---|
|  | 711 | +         return; | 
|---|
|  | 712 | +       } | 
|---|
|  | 713 | +    } | 
|---|
|  | 714 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 715 | + | 
|---|
|  | 716 | + | 
|---|
|  | 717 | kwsinit (); | 
|---|
|  | 718 | beg = pattern; | 
|---|
|  | 719 | do | 
|---|
|  | 720 | @@ -511,6 +777,76 @@ | 
|---|
|  | 721 | error (2, 0, err); | 
|---|
|  | 722 | } | 
|---|
|  | 723 |  | 
|---|
|  | 724 | +#ifdef MBS_SUPPORT | 
|---|
|  | 725 | +static int | 
|---|
|  | 726 | +Fimbexec (const char *buf, size_t size, size_t *plen, int exact) | 
|---|
|  | 727 | +{ | 
|---|
|  | 728 | +  size_t len, letter, i; | 
|---|
|  | 729 | +  int ret = -1; | 
|---|
|  | 730 | +  mbstate_t mbs; | 
|---|
|  | 731 | +  wchar_t wc; | 
|---|
|  | 732 | +  int patterns_left; | 
|---|
|  | 733 | + | 
|---|
|  | 734 | +  assert (match_icase && f_i_multibyte == 1); | 
|---|
|  | 735 | +  assert (MB_CUR_MAX > 1); | 
|---|
|  | 736 | + | 
|---|
|  | 737 | +  memset (&mbs, '\0', sizeof (mbs)); | 
|---|
|  | 738 | +  memset (Fimb.match, '\1', Fimb.count); | 
|---|
|  | 739 | +  letter = len = 0; | 
|---|
|  | 740 | +  patterns_left = 1; | 
|---|
|  | 741 | +  while (patterns_left && len <= size) | 
|---|
|  | 742 | +    { | 
|---|
|  | 743 | +      size_t c; | 
|---|
|  | 744 | + | 
|---|
|  | 745 | +      patterns_left = 0; | 
|---|
|  | 746 | +      if (len < size) | 
|---|
|  | 747 | +       { | 
|---|
|  | 748 | +         c = mbrtowc (&wc, buf + len, size - len, &mbs); | 
|---|
|  | 749 | +         if (c + 2 <= 2) | 
|---|
|  | 750 | +           return ret; | 
|---|
|  | 751 | + | 
|---|
|  | 752 | +         wc = towlower (wc); | 
|---|
|  | 753 | +       } | 
|---|
|  | 754 | +      else | 
|---|
|  | 755 | +       { | 
|---|
|  | 756 | +         c = 1; | 
|---|
|  | 757 | +         wc = L'\0'; | 
|---|
|  | 758 | +       } | 
|---|
|  | 759 | + | 
|---|
|  | 760 | +      for (i = 0; i < Fimb.count; i++) | 
|---|
|  | 761 | +       { | 
|---|
|  | 762 | +         if (Fimb.match[i]) | 
|---|
|  | 763 | +           { | 
|---|
|  | 764 | +             if (Fimb.patterns[i][letter] == L'\0') | 
|---|
|  | 765 | +               { | 
|---|
|  | 766 | +                 /* Found a match. */ | 
|---|
|  | 767 | +                 *plen = len; | 
|---|
|  | 768 | +                 if (!exact && !match_words) | 
|---|
|  | 769 | +                   return 0; | 
|---|
|  | 770 | +                 else | 
|---|
|  | 771 | +                   { | 
|---|
|  | 772 | +                     /* For -w or exact look for longest match.  */ | 
|---|
|  | 773 | +                     ret = 0; | 
|---|
|  | 774 | +                     Fimb.match[i] = '\0'; | 
|---|
|  | 775 | +                     continue; | 
|---|
|  | 776 | +                   } | 
|---|
|  | 777 | +               } | 
|---|
|  | 778 | + | 
|---|
|  | 779 | +             if (Fimb.patterns[i][letter] == wc) | 
|---|
|  | 780 | +               patterns_left = 1; | 
|---|
|  | 781 | +             else | 
|---|
|  | 782 | +               Fimb.match[i] = '\0'; | 
|---|
|  | 783 | +           } | 
|---|
|  | 784 | +       } | 
|---|
|  | 785 | + | 
|---|
|  | 786 | +      len += c; | 
|---|
|  | 787 | +      letter++; | 
|---|
|  | 788 | +    } | 
|---|
|  | 789 | + | 
|---|
|  | 790 | +  return ret; | 
|---|
|  | 791 | +} | 
|---|
|  | 792 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 793 | + | 
|---|
|  | 794 | EXECUTE_FCT(Fexecute) | 
|---|
|  | 795 | { | 
|---|
|  | 796 | register char const *beg, *try, *end; | 
|---|
|  | 797 | @@ -519,69 +855,256 @@ | 
|---|
|  | 798 | struct kwsmatch kwsmatch; | 
|---|
|  | 799 | size_t ret_val; | 
|---|
|  | 800 | #ifdef MBS_SUPPORT | 
|---|
|  | 801 | -  char *mb_properties = NULL; | 
|---|
|  | 802 | -  if (MB_CUR_MAX > 1) | 
|---|
|  | 803 | -    { | 
|---|
|  | 804 | -      if (match_icase) | 
|---|
|  | 805 | -        { | 
|---|
|  | 806 | -          char *case_buf = xmalloc(size); | 
|---|
|  | 807 | -          memcpy(case_buf, buf, size); | 
|---|
|  | 808 | -         if (start_ptr) | 
|---|
|  | 809 | -           start_ptr = case_buf + (start_ptr - buf); | 
|---|
|  | 810 | -          buf = case_buf; | 
|---|
|  | 811 | -        } | 
|---|
|  | 812 | -      mb_properties = check_multibyte_string(buf, size); | 
|---|
|  | 813 | -    } | 
|---|
|  | 814 | +  int mb_cur_max = MB_CUR_MAX; | 
|---|
|  | 815 | +  mbstate_t mbs; | 
|---|
|  | 816 | +  memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 817 | +  const char *last_char = NULL; | 
|---|
|  | 818 | #endif /* MBS_SUPPORT */ | 
|---|
|  | 819 |  | 
|---|
|  | 820 | for (beg = start_ptr ? start_ptr : buf; beg <= buf + size; beg++) | 
|---|
|  | 821 | { | 
|---|
|  | 822 | size_t offset = kwsexec (kwset, beg, buf + size - beg, &kwsmatch); | 
|---|
|  | 823 | if (offset == (size_t) -1) | 
|---|
|  | 824 | -       goto failure; | 
|---|
|  | 825 | +       return offset; | 
|---|
|  | 826 | #ifdef MBS_SUPPORT | 
|---|
|  | 827 | -      if (MB_CUR_MAX > 1 && mb_properties[offset+beg-buf] == 0) | 
|---|
|  | 828 | -       continue; /* It is a part of multibyte character.  */ | 
|---|
|  | 829 | +      if (mb_cur_max > 1 && !using_utf8) | 
|---|
|  | 830 | +       { | 
|---|
|  | 831 | +         size_t bytes_left = offset; | 
|---|
|  | 832 | +         while (bytes_left) | 
|---|
|  | 833 | +           { | 
|---|
|  | 834 | +             size_t mlen = mbrlen (beg, bytes_left, &mbs); | 
|---|
|  | 835 | + | 
|---|
|  | 836 | +             last_char = beg; | 
|---|
|  | 837 | +             if (mlen == (size_t) -1 || mlen == 0) | 
|---|
|  | 838 | +               { | 
|---|
|  | 839 | +                 /* Incomplete character: treat as single-byte. */ | 
|---|
|  | 840 | +                 memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 841 | +                 beg++; | 
|---|
|  | 842 | +                 bytes_left--; | 
|---|
|  | 843 | +                 continue; | 
|---|
|  | 844 | +               } | 
|---|
|  | 845 | + | 
|---|
|  | 846 | +             if (mlen == (size_t) -2) | 
|---|
|  | 847 | +               /* Offset points inside multibyte character: no good. */ | 
|---|
|  | 848 | +               break; | 
|---|
|  | 849 | + | 
|---|
|  | 850 | +             beg += mlen; | 
|---|
|  | 851 | +             bytes_left -= mlen; | 
|---|
|  | 852 | +           } | 
|---|
|  | 853 | + | 
|---|
|  | 854 | +         if (bytes_left) | 
|---|
|  | 855 | +           continue; | 
|---|
|  | 856 | +       } | 
|---|
|  | 857 | +      else | 
|---|
|  | 858 | #endif /* MBS_SUPPORT */ | 
|---|
|  | 859 | beg += offset; | 
|---|
|  | 860 | +#ifdef MBS_SUPPORT | 
|---|
|  | 861 | +      /* For f_i_multibyte, the string at beg now matches first 3 chars of | 
|---|
|  | 862 | +        one of the search strings (less if there are shorter search strings). | 
|---|
|  | 863 | +        See if this is a real match.  */ | 
|---|
|  | 864 | +      if (f_i_multibyte | 
|---|
|  | 865 | +         && Fimbexec (beg, buf + size - beg, &kwsmatch.size[0], start_ptr == NULL)) | 
|---|
|  | 866 | +       goto next_char; | 
|---|
|  | 867 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 868 | len = kwsmatch.size[0]; | 
|---|
|  | 869 | if (start_ptr && !match_words) | 
|---|
|  | 870 | goto success_in_beg_and_len; | 
|---|
|  | 871 | if (match_lines) | 
|---|
|  | 872 | { | 
|---|
|  | 873 | if (beg > buf && beg[-1] != eol) | 
|---|
|  | 874 | -           continue; | 
|---|
|  | 875 | +           goto next_char; | 
|---|
|  | 876 | if (beg + len < buf + size && beg[len] != eol) | 
|---|
|  | 877 | -           continue; | 
|---|
|  | 878 | +           goto next_char; | 
|---|
|  | 879 | goto success; | 
|---|
|  | 880 | } | 
|---|
|  | 881 | else if (match_words) | 
|---|
|  | 882 | -       for (try = beg; len; ) | 
|---|
|  | 883 | -         { | 
|---|
|  | 884 | -           if (try > buf && WCHAR((unsigned char) try[-1])) | 
|---|
|  | 885 | -             break; | 
|---|
|  | 886 | -           if (try + len < buf + size && WCHAR((unsigned char) try[len])) | 
|---|
|  | 887 | -             { | 
|---|
|  | 888 | -               offset = kwsexec (kwset, beg, --len, &kwsmatch); | 
|---|
|  | 889 | -               if (offset == (size_t) -1) | 
|---|
|  | 890 | -                 break; | 
|---|
|  | 891 | -               try = beg + offset; | 
|---|
|  | 892 | -               len = kwsmatch.size[0]; | 
|---|
|  | 893 | -             } | 
|---|
|  | 894 | -           else if (!start_ptr) | 
|---|
|  | 895 | -             goto success; | 
|---|
|  | 896 | -           else | 
|---|
|  | 897 | -             goto success_in_beg_and_len; | 
|---|
|  | 898 | -         } /* for (try) */ | 
|---|
|  | 899 | -      else | 
|---|
|  | 900 | +       { | 
|---|
|  | 901 | +         while (len) | 
|---|
|  | 902 | +           { | 
|---|
|  | 903 | +             int word_match = 0; | 
|---|
|  | 904 | +             if (beg > buf) | 
|---|
|  | 905 | +               { | 
|---|
|  | 906 | +#ifdef MBS_SUPPORT | 
|---|
|  | 907 | +                 if (mb_cur_max > 1) | 
|---|
|  | 908 | +                   { | 
|---|
|  | 909 | +                     const char *s; | 
|---|
|  | 910 | +                     int mr; | 
|---|
|  | 911 | +                     wchar_t pwc; | 
|---|
|  | 912 | + | 
|---|
|  | 913 | +                     if (using_utf8) | 
|---|
|  | 914 | +                       { | 
|---|
|  | 915 | +                         s = beg - 1; | 
|---|
|  | 916 | +                         while (s > buf | 
|---|
|  | 917 | +                                && (unsigned char) *s >= 0x80 | 
|---|
|  | 918 | +                                && (unsigned char) *s <= 0xbf) | 
|---|
|  | 919 | +                           --s; | 
|---|
|  | 920 | +                       } | 
|---|
|  | 921 | +                     else | 
|---|
|  | 922 | +                       s = last_char; | 
|---|
|  | 923 | +                     mr = mbtowc (&pwc, s, beg - s); | 
|---|
|  | 924 | +                     if (mr <= 0) | 
|---|
|  | 925 | +                       memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 926 | +                     else if ((iswalnum (pwc) || pwc == L'_') | 
|---|
|  | 927 | +                              && mr == (int) (beg - s)) | 
|---|
|  | 928 | +                       goto next_char; | 
|---|
|  | 929 | +                   } | 
|---|
|  | 930 | +                 else | 
|---|
|  | 931 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 932 | +                 if (WCHAR ((unsigned char) beg[-1])) | 
|---|
|  | 933 | +                   goto next_char; | 
|---|
|  | 934 | +               } | 
|---|
|  | 935 | +#ifdef MBS_SUPPORT | 
|---|
|  | 936 | +             if (mb_cur_max > 1) | 
|---|
|  | 937 | +               { | 
|---|
|  | 938 | +                 wchar_t nwc; | 
|---|
|  | 939 | +                 int mr; | 
|---|
|  | 940 | + | 
|---|
|  | 941 | +                 mr = mbtowc (&nwc, beg + len, buf + size - beg - len); | 
|---|
|  | 942 | +                 if (mr <= 0) | 
|---|
|  | 943 | +                   { | 
|---|
|  | 944 | +                     memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 945 | +                     word_match = 1; | 
|---|
|  | 946 | +                   } | 
|---|
|  | 947 | +                 else if (!iswalnum (nwc) && nwc != L'_') | 
|---|
|  | 948 | +                   word_match = 1; | 
|---|
|  | 949 | +               } | 
|---|
|  | 950 | +             else | 
|---|
|  | 951 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 952 | +               if (beg + len >= buf + size || !WCHAR ((unsigned char) beg[len])) | 
|---|
|  | 953 | +                 word_match = 1; | 
|---|
|  | 954 | +             if (word_match) | 
|---|
|  | 955 | +               { | 
|---|
|  | 956 | +                 if (start_ptr == NULL) | 
|---|
|  | 957 | +                   /* Returns the whole line now we know there's a word match. */ | 
|---|
|  | 958 | +                   goto success; | 
|---|
|  | 959 | +                 else { | 
|---|
|  | 960 | +                   /* Returns just this word match. */ | 
|---|
|  | 961 | +                   *match_size = len; | 
|---|
|  | 962 | +                   return beg - buf; | 
|---|
|  | 963 | +                 } | 
|---|
|  | 964 | +               } | 
|---|
|  | 965 | +             if (len > 0) | 
|---|
|  | 966 | +               { | 
|---|
|  | 967 | +                 /* Try a shorter length anchored at the same place. */ | 
|---|
|  | 968 | +                 --len; | 
|---|
|  | 969 | +                 offset = kwsexec (kwset, beg, len, &kwsmatch); | 
|---|
|  | 970 | + | 
|---|
|  | 971 | +                 if (offset == -1) | 
|---|
|  | 972 | +                   goto next_char; /* Try a different anchor. */ | 
|---|
|  | 973 | +#ifdef MBS_SUPPORT | 
|---|
|  | 974 | + | 
|---|
|  | 975 | +                 if (mb_cur_max > 1 && !using_utf8) | 
|---|
|  | 976 | +                   { | 
|---|
|  | 977 | +                     size_t bytes_left = offset; | 
|---|
|  | 978 | +                     while (bytes_left) | 
|---|
|  | 979 | +                       { | 
|---|
|  | 980 | +                         size_t mlen = mbrlen (beg, bytes_left, &mbs); | 
|---|
|  | 981 | + | 
|---|
|  | 982 | +                         last_char = beg; | 
|---|
|  | 983 | +                         if (mlen == (size_t) -1 || mlen == 0) | 
|---|
|  | 984 | +                           { | 
|---|
|  | 985 | +                             /* Incomplete character: treat as single-byte. */ | 
|---|
|  | 986 | +                             memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 987 | +                             beg++; | 
|---|
|  | 988 | +                             bytes_left--; | 
|---|
|  | 989 | +                             continue; | 
|---|
|  | 990 | +                           } | 
|---|
|  | 991 | + | 
|---|
|  | 992 | +                         if (mlen == (size_t) -2) | 
|---|
|  | 993 | +                           { | 
|---|
|  | 994 | +                             /* Offset points inside multibyte character: | 
|---|
|  | 995 | +                              * no good. */ | 
|---|
|  | 996 | +                             break; | 
|---|
|  | 997 | +                           } | 
|---|
|  | 998 | + | 
|---|
|  | 999 | +                         beg += mlen; | 
|---|
|  | 1000 | +                         bytes_left -= mlen; | 
|---|
|  | 1001 | +                       } | 
|---|
|  | 1002 | + | 
|---|
|  | 1003 | +                     if (bytes_left) | 
|---|
|  | 1004 | +                       { | 
|---|
|  | 1005 | +                         memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 1006 | +                         goto next_char; /* Try a different anchor. */ | 
|---|
|  | 1007 | +                       } | 
|---|
|  | 1008 | +                   } | 
|---|
|  | 1009 | +                 else | 
|---|
|  | 1010 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 1011 | +                 beg += offset; | 
|---|
|  | 1012 | +#ifdef MBS_SUPPORT | 
|---|
|  | 1013 | +                 /* The string at beg now matches first 3 chars of one of | 
|---|
|  | 1014 | +                    the search strings (less if there are shorter search | 
|---|
|  | 1015 | +                    strings).  See if this is a real match.  */ | 
|---|
|  | 1016 | +                 if (f_i_multibyte | 
|---|
|  | 1017 | +                     && Fimbexec (beg, len - offset, &kwsmatch.size[0], | 
|---|
|  | 1018 | +                                  start_ptr == NULL)) | 
|---|
|  | 1019 | +                   goto next_char; | 
|---|
|  | 1020 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 1021 | +                 len = kwsmatch.size[0]; | 
|---|
|  | 1022 | +               } | 
|---|
|  | 1023 | +           } | 
|---|
|  | 1024 | +       } | 
|---|
|  | 1025 | +       else | 
|---|
|  | 1026 | goto success; | 
|---|
|  | 1027 | -    } /* for (beg in buf) */ | 
|---|
|  | 1028 | +next_char:; | 
|---|
|  | 1029 | +#ifdef MBS_SUPPORT | 
|---|
|  | 1030 | +      /* Advance to next character.  For MB_CUR_MAX == 1 case this is handled | 
|---|
|  | 1031 | +        by ++beg above.  */ | 
|---|
|  | 1032 | +      if (mb_cur_max > 1) | 
|---|
|  | 1033 | +       { | 
|---|
|  | 1034 | +         if (using_utf8) | 
|---|
|  | 1035 | +           { | 
|---|
|  | 1036 | +             unsigned char c = *beg; | 
|---|
|  | 1037 | +             if (c >= 0xc2) | 
|---|
|  | 1038 | +               { | 
|---|
|  | 1039 | +                 if (c < 0xe0) | 
|---|
|  | 1040 | +                   ++beg; | 
|---|
|  | 1041 | +                 else if (c < 0xf0) | 
|---|
|  | 1042 | +                   beg += 2; | 
|---|
|  | 1043 | +                 else if (c < 0xf8) | 
|---|
|  | 1044 | +                   beg += 3; | 
|---|
|  | 1045 | +                 else if (c < 0xfc) | 
|---|
|  | 1046 | +                   beg += 4; | 
|---|
|  | 1047 | +                 else if (c < 0xfe) | 
|---|
|  | 1048 | +                   beg += 5; | 
|---|
|  | 1049 | +               } | 
|---|
|  | 1050 | +           } | 
|---|
|  | 1051 | +         else | 
|---|
|  | 1052 | +           { | 
|---|
|  | 1053 | +             size_t l = mbrlen (beg, buf + size - beg, &mbs); | 
|---|
|  | 1054 |  | 
|---|
|  | 1055 | - failure: | 
|---|
|  | 1056 | -  ret_val = -1; | 
|---|
|  | 1057 | -  goto out; | 
|---|
|  | 1058 | +             last_char = beg; | 
|---|
|  | 1059 | +             if (l + 2 >= 2) | 
|---|
|  | 1060 | +               beg += l - 1; | 
|---|
|  | 1061 | +             else | 
|---|
|  | 1062 | +               memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 1063 | +           } | 
|---|
|  | 1064 | +       } | 
|---|
|  | 1065 | +#endif /* MBS_SUPPORT */ | 
|---|
|  | 1066 | +    } | 
|---|
|  | 1067 | + | 
|---|
|  | 1068 | +  return -1; | 
|---|
|  | 1069 |  | 
|---|
|  | 1070 | success: | 
|---|
|  | 1071 | +#ifdef MBS_SUPPORT | 
|---|
|  | 1072 | +  if (mb_cur_max > 1 && !using_utf8) | 
|---|
|  | 1073 | +    { | 
|---|
|  | 1074 | +      end = beg + len; | 
|---|
|  | 1075 | +      while (end < buf + size) | 
|---|
|  | 1076 | +       { | 
|---|
|  | 1077 | +         size_t mlen = mbrlen (end, buf + size - end, &mbs); | 
|---|
|  | 1078 | +         if (mlen == (size_t) -1 || mlen == (size_t) -2 || mlen == 0) | 
|---|
|  | 1079 | +           { | 
|---|
|  | 1080 | +             memset (&mbs, '\0', sizeof (mbstate_t)); | 
|---|
|  | 1081 | +             mlen = 1; | 
|---|
|  | 1082 | +           } | 
|---|
|  | 1083 | +         if (mlen == 1 && *end == eol) | 
|---|
|  | 1084 | +           break; | 
|---|
|  | 1085 | + | 
|---|
|  | 1086 | +         end += mlen; | 
|---|
|  | 1087 | +       } | 
|---|
|  | 1088 | +     } | 
|---|
|  | 1089 | +  else | 
|---|
|  | 1090 | + #endif /* MBS_SUPPORT */ | 
|---|
|  | 1091 | end = memchr (beg + len, eol, (buf + size) - (beg + len)); | 
|---|
|  | 1092 | end++; | 
|---|
|  | 1093 | while (buf < beg && beg[-1] != eol) | 
|---|
|  | 1094 | @@ -591,15 +1114,6 @@ | 
|---|
|  | 1095 | *match_size = len; | 
|---|
|  | 1096 | ret_val = beg - buf; | 
|---|
|  | 1097 | out: | 
|---|
|  | 1098 | -#ifdef MBS_SUPPORT | 
|---|
|  | 1099 | -  if (MB_CUR_MAX > 1) | 
|---|
|  | 1100 | -    { | 
|---|
|  | 1101 | -      if (match_icase) | 
|---|
|  | 1102 | -        free((char*)buf); | 
|---|
|  | 1103 | -      if (mb_properties) | 
|---|
|  | 1104 | -        free(mb_properties); | 
|---|
|  | 1105 | -    } | 
|---|
|  | 1106 | -#endif /* MBS_SUPPORT */ | 
|---|
|  | 1107 | return ret_val; | 
|---|
|  | 1108 | } | 
|---|
|  | 1109 | #endif /* defined(GREP_PROGRAM) || defined(FGREP_PROGRAM) */ | 
|---|