1 /*************************************************
3 *************************************************/
5 /* This is a grep program that uses the PCRE regular expression library to do
6 its pattern matching. On a Unix or Win32 system it can recurse into
9 Copyright (c) 1997-2010 University of Cambridge
11 -----------------------------------------------------------------------------
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
15 * Redistributions of source code must retain the above copyright notice,
16 this list of conditions and the following disclaimer.
18 * Redistributions in binary form must reproduce the above copyright
19 notice, this list of conditions and the following disclaimer in the
20 documentation and/or other materials provided with the distribution.
22 * Neither the name of the University of Cambridge nor the names of its
23 contributors may be used to endorse or promote products derived from
24 this software without specific prior written permission.
26 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
30 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 POSSIBILITY OF SUCH DAMAGE.
37 -----------------------------------------------------------------------------
51 #include <sys/types.h>
73 #define MAX_PATTERN_COUNT 100
74 #define OFFSET_SIZE 99
77 #define MBUFTHIRD BUFSIZ
79 #define MBUFTHIRD 8192
82 /* Values for the "filenames" variable, which specifies options for file name
83 output. The order is important; it is assumed that a file name is wanted for
84 all values greater than FN_DEFAULT. */
86 enum { FN_NONE, FN_DEFAULT, FN_MATCH_ONLY, FN_NOMATCH_ONLY, FN_FORCE };
88 /* File reading styles */
90 enum { FR_PLAIN, FR_LIBZ, FR_LIBBZ2 };
92 /* Actions for the -d and -D options */
94 enum { dee_READ, dee_SKIP, dee_RECURSE };
95 enum { DEE_READ, DEE_SKIP };
97 /* Actions for special processing options (flag bits) */
99 #define PO_WORD_MATCH 0x0001
100 #define PO_LINE_MATCH 0x0002
101 #define PO_FIXED_STRINGS 0x0004
103 /* Line ending types */
105 enum { EL_LF, EL_CR, EL_CRLF, EL_ANY, EL_ANYCRLF };
107 /* In newer versions of gcc, with FORTIFY_SOURCE set (the default in some
108 environments), a warning is issued if the value of fwrite() is ignored.
109 Unfortunately, casting to (void) does not suppress the warning. To get round
110 this, we use a macro that compiles a fudge. Oddly, this does not also seem to
111 apply to fprintf(). */
113 #define FWRITE(a,b,c,d) if (fwrite(a,b,c,d)) {}
117 /*************************************************
119 *************************************************/
121 /* Jeffrey Friedl has some debugging requirements that are not part of the
125 static int S_arg = -1;
126 static unsigned int jfriedl_XR = 0; /* repeat regex attempt this many times */
127 static unsigned int jfriedl_XT = 0; /* replicate text this many times */
128 static const char *jfriedl_prefix = "";
129 static const char *jfriedl_postfix = "";
132 static int endlinetype;
134 static char *colour_string = (char *)"1;31";
135 static char *colour_option = NULL;
136 static char *dee_option = NULL;
137 static char *DEE_option = NULL;
138 static char *newline = NULL;
139 static char *pattern_filename = NULL;
140 static char *stdin_name = (char *)"(standard input)";
141 static char *locale = NULL;
143 static const unsigned char *pcretables = NULL;
145 static int pattern_count = 0;
146 static pcre **pattern_list = NULL;
147 static pcre_extra **hints_list = NULL;
149 static char *include_pattern = NULL;
150 static char *exclude_pattern = NULL;
151 static char *include_dir_pattern = NULL;
152 static char *exclude_dir_pattern = NULL;
154 static pcre *include_compiled = NULL;
155 static pcre *exclude_compiled = NULL;
156 static pcre *include_dir_compiled = NULL;
157 static pcre *exclude_dir_compiled = NULL;
159 static int after_context = 0;
160 static int before_context = 0;
161 static int both_context = 0;
162 static int dee_action = dee_READ;
163 static int DEE_action = DEE_READ;
164 static int error_count = 0;
165 static int filenames = FN_DEFAULT;
166 static int process_options = 0;
168 static BOOL count_only = FALSE;
169 static BOOL do_colour = FALSE;
170 static BOOL file_offsets = FALSE;
171 static BOOL hyphenpending = FALSE;
172 static BOOL invert = FALSE;
173 static BOOL line_buffered = FALSE;
174 static BOOL line_offsets = FALSE;
175 static BOOL multiline = FALSE;
176 static BOOL number = FALSE;
177 static BOOL omit_zero_count = FALSE;
178 static BOOL only_matching = FALSE;
179 static BOOL quiet = FALSE;
180 static BOOL silent = FALSE;
181 static BOOL utf8 = FALSE;
183 /* Structure for options and list of them */
185 enum { OP_NODATA, OP_STRING, OP_OP_STRING, OP_NUMBER, OP_OP_NUMBER,
188 typedef struct option_item {
192 const char *long_name;
193 const char *help_text;
196 /* Options without a single-letter equivalent get a negative value. This can be
197 used to identify them. */
199 #define N_COLOUR (-1)
200 #define N_EXCLUDE (-2)
201 #define N_EXCLUDE_DIR (-3)
203 #define N_INCLUDE (-5)
204 #define N_INCLUDE_DIR (-6)
206 #define N_LOCALE (-8)
208 #define N_LOFFSETS (-10)
209 #define N_FOFFSETS (-11)
210 #define N_LBUFFER (-12)
212 static option_item optionlist[] = {
213 { OP_NODATA, N_NULL, NULL, "", " terminate options" },
214 { OP_NODATA, N_HELP, NULL, "help", "display this help and exit" },
215 { OP_NUMBER, 'A', &after_context, "after-context=number", "set number of following context lines" },
216 { OP_NUMBER, 'B', &before_context, "before-context=number", "set number of prior context lines" },
217 { OP_OP_STRING, N_COLOUR, &colour_option, "color=option", "matched text color option" },
218 { OP_NUMBER, 'C', &both_context, "context=number", "set number of context lines, before & after" },
219 { OP_NODATA, 'c', NULL, "count", "print only a count of matching lines per FILE" },
220 { OP_OP_STRING, N_COLOUR, &colour_option, "colour=option", "matched text colour option" },
221 { OP_STRING, 'D', &DEE_option, "devices=action","how to handle devices, FIFOs, and sockets" },
222 { OP_STRING, 'd', &dee_option, "directories=action", "how to handle directories" },
223 { OP_PATLIST, 'e', NULL, "regex(p)=pattern", "specify pattern (may be used more than once)" },
224 { OP_NODATA, 'F', NULL, "fixed-strings", "patterns are sets of newline-separated strings" },
225 { OP_STRING, 'f', &pattern_filename, "file=path", "read patterns from file" },
226 { OP_NODATA, N_FOFFSETS, NULL, "file-offsets", "output file offsets, not text" },
227 { OP_NODATA, 'H', NULL, "with-filename", "force the prefixing filename on output" },
228 { OP_NODATA, 'h', NULL, "no-filename", "suppress the prefixing filename on output" },
229 { OP_NODATA, 'i', NULL, "ignore-case", "ignore case distinctions" },
230 { OP_NODATA, 'l', NULL, "files-with-matches", "print only FILE names containing matches" },
231 { OP_NODATA, 'L', NULL, "files-without-match","print only FILE names not containing matches" },
232 { OP_STRING, N_LABEL, &stdin_name, "label=name", "set name for standard input" },
233 { OP_NODATA, N_LBUFFER, NULL, "line-buffered", "use line buffering" },
234 { OP_NODATA, N_LOFFSETS, NULL, "line-offsets", "output line numbers and offsets, not text" },
235 { OP_STRING, N_LOCALE, &locale, "locale=locale", "use the named locale" },
236 { OP_NODATA, 'M', NULL, "multiline", "run in multiline mode" },
237 { OP_STRING, 'N', &newline, "newline=type", "set newline type (CR, LF, CRLF, ANYCRLF or ANY)" },
238 { OP_NODATA, 'n', NULL, "line-number", "print line number with output lines" },
239 { OP_NODATA, 'o', NULL, "only-matching", "show only the part of the line that matched" },
240 { OP_NODATA, 'q', NULL, "quiet", "suppress output, just set return code" },
241 { OP_NODATA, 'r', NULL, "recursive", "recursively scan sub-directories" },
242 { OP_STRING, N_EXCLUDE,&exclude_pattern, "exclude=pattern","exclude matching files when recursing" },
243 { OP_STRING, N_INCLUDE,&include_pattern, "include=pattern","include matching files when recursing" },
244 { OP_STRING, N_EXCLUDE_DIR,&exclude_dir_pattern, "exclude_dir=pattern","exclude matching directories when recursing" },
245 { OP_STRING, N_INCLUDE_DIR,&include_dir_pattern, "include_dir=pattern","include matching directories when recursing" },
247 { OP_OP_NUMBER, 'S', &S_arg, "jeffS", "replace matched (sub)string with X" },
249 { OP_NODATA, 's', NULL, "no-messages", "suppress error messages" },
250 { OP_NODATA, 'u', NULL, "utf-8", "use UTF-8 mode" },
251 { OP_NODATA, 'V', NULL, "version", "print version information and exit" },
252 { OP_NODATA, 'v', NULL, "invert-match", "select non-matching lines" },
253 { OP_NODATA, 'w', NULL, "word-regex(p)", "force patterns to match only as words" },
254 { OP_NODATA, 'x', NULL, "line-regex(p)", "force patterns to match only whole lines" },
255 { OP_NODATA, 0, NULL, NULL, NULL }
258 /* Tables for prefixing and suffixing patterns, according to the -w, -x, and -F
259 options. These set the 1, 2, and 4 bits in process_options, respectively. Note
260 that the combination of -w and -x has the same effect as -x on its own, so we
261 can treat them as the same. */
263 static const char *prefix[] = {
264 "", "\\b", "^(?:", "^(?:", "\\Q", "\\b\\Q", "^(?:\\Q", "^(?:\\Q" };
266 static const char *suffix[] = {
267 "", "\\b", ")$", ")$", "\\E", "\\E\\b", "\\E)$", "\\E)$" };
269 /* UTF-8 tables - used only when the newline setting is "any". */
271 const int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
273 const char utf8_table4[] = {
274 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
275 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
276 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
277 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
281 /*************************************************
282 * OS-specific functions *
283 *************************************************/
285 /* These functions are defined so that they can be made system specific,
286 although at present the only ones are for Unix, Win32, and for "no support". */
289 /************* Directory scanning in Unix ***********/
291 #if defined HAVE_SYS_STAT_H && defined HAVE_DIRENT_H && defined HAVE_SYS_TYPES_H
292 #include <sys/types.h>
293 #include <sys/stat.h>
296 typedef DIR directory_type;
299 isdirectory(char *filename)
302 if (stat(filename, &statbuf) < 0)
303 return 0; /* In the expectation that opening as a file will fail */
304 return ((statbuf.st_mode & S_IFMT) == S_IFDIR)? '/' : 0;
307 static directory_type *
308 opendirectory(char *filename)
310 return opendir(filename);
314 readdirectory(directory_type *dir)
318 struct dirent *dent = readdir(dir);
319 if (dent == NULL) return NULL;
320 if (strcmp(dent->d_name, ".") != 0 && strcmp(dent->d_name, "..") != 0)
323 /* Control never reaches here */
327 closedirectory(directory_type *dir)
333 /************* Test for regular file in Unix **********/
336 isregfile(char *filename)
339 if (stat(filename, &statbuf) < 0)
340 return 1; /* In the expectation that opening as a file will fail */
341 return (statbuf.st_mode & S_IFMT) == S_IFREG;
345 /************* Test for a terminal in Unix **********/
350 return isatty(fileno(stdout));
356 return isatty(fileno(f));
360 /************* Directory scanning in Win32 ***********/
362 /* I (Philip Hazel) have no means of testing this code. It was contributed by
363 Lionel Fourquaux. David Burgess added a patch to define INVALID_FILE_ATTRIBUTES
364 when it did not exist. David Byron added a patch that moved the #include of
365 <windows.h> to before the INVALID_FILE_ATTRIBUTES definition rather than after.
373 #ifndef WIN32_LEAN_AND_MEAN
374 # define WIN32_LEAN_AND_MEAN
379 #ifndef INVALID_FILE_ATTRIBUTES
380 #define INVALID_FILE_ATTRIBUTES 0xFFFFFFFF
383 typedef struct directory_type
387 WIN32_FIND_DATA data;
391 isdirectory(char *filename)
393 DWORD attr = GetFileAttributes(filename);
394 if (attr == INVALID_FILE_ATTRIBUTES)
396 return ((attr & FILE_ATTRIBUTE_DIRECTORY) != 0) ? '/' : 0;
400 opendirectory(char *filename)
406 len = strlen(filename);
407 pattern = (char *) malloc(len + 3);
408 dir = (directory_type *) malloc(sizeof(*dir));
409 if ((pattern == NULL) || (dir == NULL))
411 fprintf(stderr, "pcregrep: malloc failed\n");
414 memcpy(pattern, filename, len);
415 memcpy(&(pattern[len]), "\\*", 3);
416 dir->handle = FindFirstFile(pattern, &(dir->data));
417 if (dir->handle != INVALID_HANDLE_VALUE)
423 err = GetLastError();
426 errno = (err == ERROR_ACCESS_DENIED) ? EACCES : ENOENT;
431 readdirectory(directory_type *dir)
437 if (!FindNextFile(dir->handle, &(dir->data)))
444 if (strcmp(dir->data.cFileName, ".") != 0 && strcmp(dir->data.cFileName, "..") != 0)
445 return dir->data.cFileName;
448 return NULL; /* Keep compiler happy; never executed */
453 closedirectory(directory_type *dir)
455 FindClose(dir->handle);
460 /************* Test for regular file in Win32 **********/
462 /* I don't know how to do this, or if it can be done; assume all paths are
463 regular if they are not directories. */
465 int isregfile(char *filename)
467 return !isdirectory(filename);
471 /************* Test for a terminal in Win32 **********/
473 /* I don't know how to do this; assume never */
488 /************* Directory scanning when we can't do it ***********/
490 /* The type is void, and apart from isdirectory(), the functions do nothing. */
494 typedef void directory_type;
496 int isdirectory(char *filename) { return 0; }
497 directory_type * opendirectory(char *filename) { return (directory_type*)0;}
498 char *readdirectory(directory_type *dir) { return (char*)0;}
499 void closedirectory(directory_type *dir) {}
502 /************* Test for regular when we can't do it **********/
504 /* Assume all files are regular. */
506 int isregfile(char *filename) { return 1; }
509 /************* Test for a terminal when we can't do it **********/
527 #ifndef HAVE_STRERROR
528 /*************************************************
529 * Provide strerror() for non-ANSI libraries *
530 *************************************************/
532 /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
533 in their libraries, but can provide the same facility by this simple
534 alternative function. */
537 extern char *sys_errlist[];
542 if (n < 0 || n >= sys_nerr) return "unknown error number";
543 return sys_errlist[n];
545 #endif /* HAVE_STRERROR */
549 /*************************************************
550 * Read one line of input *
551 *************************************************/
553 /* Normally, input is read using fread() into a large buffer, so many lines may
554 be read at once. However, doing this for tty input means that no output appears
555 until a lot of input has been typed. Instead, tty input is handled line by
556 line. We cannot use fgets() for this, because it does not stop at a binary
557 zero, and therefore there is no way of telling how many characters it has read,
558 because there may be binary zeros embedded in the data.
561 buffer the buffer to read into
562 length the maximum number of characters to read
565 Returns: the number of characters read, zero at end of file
569 read_one_line(char *buffer, int length, FILE *f)
573 while ((c = fgetc(f)) != EOF)
576 if (c == '\n' || yield >= length) break;
583 /*************************************************
585 *************************************************/
587 /* The length of the endline sequence that is found is set via lenptr. This may
588 be zero at the very end of the file if there is no line-ending sequence there.
591 p current position in line
592 endptr end of available data
593 lenptr where to put the length of the eol sequence
595 Returns: pointer to the last byte of the line
599 end_of_line(char *p, char *endptr, int *lenptr)
603 default: /* Just in case */
605 while (p < endptr && *p != '\n') p++;
615 while (p < endptr && *p != '\r') p++;
627 while (p < endptr && *p != '\r') p++;
645 register int c = *((unsigned char *)p);
647 if (utf8 && c >= 0xc0)
650 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
652 c = (c & utf8_table3[extra]) << gcss;
653 for (gcii = 1; gcii <= extra; gcii++)
656 c |= (p[gcii] & 0x3f) << gcss;
669 if (p < endptr && *p == 0x0a)
680 } /* End of loop for ANYCRLF case */
682 *lenptr = 0; /* Must have hit the end */
689 register int c = *((unsigned char *)p);
691 if (utf8 && c >= 0xc0)
694 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
696 c = (c & utf8_table3[extra]) << gcss;
697 for (gcii = 1; gcii <= extra; gcii++)
700 c |= (p[gcii] & 0x3f) << gcss;
715 if (p < endptr && *p == 0x0a)
723 // case 0x85: /* NEL */
724 // *lenptr = utf8? 2 : 1;
727 case 0x2028: /* LS */
728 case 0x2029: /* PS */
735 } /* End of loop for ANY case */
737 *lenptr = 0; /* Must have hit the end */
739 } /* End of overall switch */
744 /*************************************************
745 * Find start of previous line *
746 *************************************************/
748 /* This is called when looking back for before lines to print.
751 p start of the subsequent line
752 startptr start of available data
754 Returns: pointer to the start of the previous line
758 previous_line(char *p, char *startptr)
762 default: /* Just in case */
765 while (p > startptr && p[-1] != '\n') p--;
770 while (p > startptr && p[-1] != '\n') p--;
777 while (p > startptr && p[-1] != '\n') p--;
778 if (p <= startptr + 1 || p[-2] == '\r') return p;
780 return p; /* But control should never get here */
784 if (*(--p) == '\n' && p > startptr && p[-1] == '\r') p--;
785 if (utf8) while ((*p & 0xc0) == 0x80) p--;
795 while ((*pp & 0xc0) == 0x80) pp--;
796 c = *((unsigned char *)pp);
800 extra = utf8_table4[c & 0x3f]; /* Number of additional bytes */
802 c = (c & utf8_table3[extra]) << gcss;
803 for (gcii = 1; gcii <= extra; gcii++)
806 c |= (pp[gcii] & 0x3f) << gcss;
810 else c = *((unsigned char *)pp);
812 if (endlinetype == EL_ANYCRLF) switch (c)
828 // case 0x85: /* NEL */
829 case 0x2028: /* LS */
830 case 0x2029: /* PS */
837 p = pp; /* Back one character */
838 } /* End of loop for ANY case */
840 return startptr; /* Hit start of data */
841 } /* End of overall switch */
848 /*************************************************
849 * Print the previous "after" lines *
850 *************************************************/
852 /* This is called if we are about to lose said lines because of buffer filling,
853 and at the end of the file. The data in the line is written using fwrite() so
854 that a binary zero does not terminate it.
857 lastmatchnumber the number of the last matching line, plus one
858 lastmatchrestart where we restarted after the last match
859 endptr end of available data
860 printname filename for printing
865 static void do_after_lines(int lastmatchnumber, char *lastmatchrestart,
866 char *endptr, char *printname)
868 if (after_context > 0 && lastmatchnumber > 0)
871 while (lastmatchrestart < endptr && count++ < after_context)
874 char *pp = lastmatchrestart;
875 if (printname != NULL) fprintf(stdout, "%s-", printname);
876 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
877 pp = end_of_line(pp, endptr, &ellength);
878 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
879 lastmatchrestart = pp;
881 hyphenpending = TRUE;
887 /*************************************************
888 * Apply patterns to subject till one matches *
889 *************************************************/
891 /* This function is called to run through all patterns, looking for a match. It
892 is used multiple times for the same subject when colouring is enabled, in order
893 to find all possible matches.
896 matchptr the start of the subject
897 length the length of the subject to match
898 offsets the offets vector to fill in
899 mrc address of where to put the result of pcre_exec()
901 Returns: TRUE if there was a match
902 FALSE if there was no match
903 invert if there was a non-fatal error
907 match_patterns(char *matchptr, size_t length, int *offsets, int *mrc)
910 for (i = 0; i < pattern_count; i++)
912 *mrc = pcre_exec(pattern_list[i], hints_list[i], matchptr, (int)length, 0,
913 PCRE_NOTEMPTY, offsets, OFFSET_SIZE);
914 if (*mrc >= 0) return TRUE;
915 if (*mrc == PCRE_ERROR_NOMATCH) continue;
916 fprintf(stderr, "pcregrep: pcre_exec() error %d while matching ", *mrc);
917 if (pattern_count > 1) fprintf(stderr, "pattern number %d to ", i+1);
918 fprintf(stderr, "this text:\n");
919 FWRITE(matchptr, 1, length, stderr); /* In case binary zero included */
920 fprintf(stderr, "\n");
921 if (error_count == 0 &&
922 (*mrc == PCRE_ERROR_MATCHLIMIT || *mrc == PCRE_ERROR_RECURSIONLIMIT))
924 fprintf(stderr, "pcregrep: error %d means that a resource limit "
925 "was exceeded\n", *mrc);
926 fprintf(stderr, "pcregrep: check your regex for nested unlimited loops\n");
928 if (error_count++ > 20)
930 fprintf(stderr, "pcregrep: too many errors - abandoned\n");
933 return invert; /* No more matching; don't show the line again */
936 return FALSE; /* No match, no errors */
941 /*************************************************
942 * Grep an individual file *
943 *************************************************/
945 /* This is called from grep_or_recurse() below. It uses a buffer that is three
946 times the value of MBUFTHIRD. The matching point is never allowed to stray into
947 the top third of the buffer, thus keeping more of the file available for
948 context printing or for multiline scanning. For large files, the pointer will
949 be in the middle third most of the time, so the bottom third is available for
950 "before" context printing.
953 handle the fopened FILE stream for a normal file
954 the gzFile pointer when reading is via libz
955 the BZFILE pointer when reading is via libbz2
956 frtype FR_PLAIN, FR_LIBZ, or FR_LIBBZ2
957 printname the file name if it is to be printed for each match
958 or NULL if the file name is not to be printed
959 it cannot be NULL if filenames[_nomatch]_only is set
961 Returns: 0 if there was at least one match
962 1 otherwise (no matches)
963 2 if there is a read error on a .bz2 file
967 pcregrep(void *handle, int frtype, char *printname)
971 int lastmatchnumber = 0;
974 int offsets[OFFSET_SIZE];
975 char *lastmatchrestart = NULL;
976 char buffer[3*MBUFTHIRD];
980 BOOL endhyphenpending = FALSE;
981 BOOL input_line_buffered = line_buffered;
982 FILE *in = NULL; /* Ensure initialized */
988 #ifdef SUPPORT_LIBBZ2
989 BZFILE *inbz2 = NULL;
993 /* Do the first read into the start of the buffer and set up the pointer to end
994 of what we have. In the case of libz, a non-zipped .gz file will be read as a
995 plain file. However, if a .bz2 file isn't actually bzipped, the first read will
999 if (frtype == FR_LIBZ)
1001 ingz = (gzFile)handle;
1002 bufflength = gzread (ingz, buffer, 3*MBUFTHIRD);
1007 #ifdef SUPPORT_LIBBZ2
1008 if (frtype == FR_LIBBZ2)
1010 inbz2 = (BZFILE *)handle;
1011 bufflength = BZ2_bzread(inbz2, buffer, 3*MBUFTHIRD);
1012 if ((int)bufflength < 0) return 2; /* Gotcha: bufflength is size_t; */
1013 } /* without the cast it is unsigned. */
1018 in = (FILE *)handle;
1019 if (is_file_tty(in)) input_line_buffered = TRUE;
1020 bufflength = input_line_buffered?
1021 read_one_line(buffer, 3*MBUFTHIRD, in) :
1022 fread(buffer, 1, 3*MBUFTHIRD, in);
1025 endptr = buffer + bufflength;
1027 /* Loop while the current pointer is not at the end of the file. For large
1028 files, endptr will be at the end of the buffer when we are in the middle of the
1029 file, but ptr will never get there, because as soon as it gets over 2/3 of the
1030 way, the buffer is shifted left and re-filled. */
1032 while (ptr < endptr)
1037 char *matchptr = ptr;
1039 size_t length, linelength;
1041 /* At this point, ptr is at the start of a line. We need to find the length
1042 of the subject string to pass to pcre_exec(). In multiline mode, it is the
1043 length remainder of the data in the buffer. Otherwise, it is the length of
1044 the next line, excluding the terminating newline. After matching, we always
1045 advance by the length of the next line. In multiline mode the PCRE_FIRSTLINE
1046 option is used for compiling, so that any match is constrained to be in the
1049 t = end_of_line(t, endptr, &endlinelength);
1050 linelength = t - ptr - endlinelength;
1051 length = multiline? (size_t)(endptr - ptr) : linelength;
1053 /* Extra processing for Jeffrey Friedl's debugging. */
1055 #ifdef JFRIEDL_DEBUG
1056 if (jfriedl_XT || jfriedl_XR)
1058 #include <sys/time.h>
1060 struct timeval start_time, end_time;
1061 struct timezone dummy;
1066 unsigned long newlen = length * jfriedl_XT + strlen(jfriedl_prefix) + strlen(jfriedl_postfix);
1067 const char *orig = ptr;
1068 ptr = malloc(newlen + 1);
1070 printf("out of memory");
1074 strcpy(endptr, jfriedl_prefix); endptr += strlen(jfriedl_prefix);
1075 for (i = 0; i < jfriedl_XT; i++) {
1076 strncpy(endptr, orig, length);
1079 strcpy(endptr, jfriedl_postfix); endptr += strlen(jfriedl_postfix);
1083 if (gettimeofday(&start_time, &dummy) != 0)
1084 perror("bad gettimeofday");
1087 for (i = 0; i < jfriedl_XR; i++)
1088 match = (pcre_exec(pattern_list[0], hints_list[0], ptr, length, 0,
1089 PCRE_NOTEMPTY, offsets, OFFSET_SIZE) >= 0);
1091 if (gettimeofday(&end_time, &dummy) != 0)
1092 perror("bad gettimeofday");
1094 double delta = ((end_time.tv_sec + (end_time.tv_usec / 1000000.0))
1096 (start_time.tv_sec + (start_time.tv_usec / 1000000.0)));
1098 printf("%s TIMER[%.4f]\n", match ? "MATCH" : "FAIL", delta);
1103 /* We come back here after a match when the -o option (only_matching) is set,
1104 in order to find any further matches in the same line. */
1106 ONLY_MATCHING_RESTART:
1108 /* Run through all the patterns until one matches or there is an error other
1109 than NOMATCH. This code is in a subroutine so that it can be re-used for
1110 finding subsequent matches when colouring matched lines. */
1112 match = match_patterns(matchptr, length, offsets, &mrc);
1114 /* If it's a match or a not-match (as required), do what's wanted. */
1116 if (match != invert)
1118 BOOL hyphenprinted = FALSE;
1120 /* We've failed if we want a file that doesn't have any matches. */
1122 if (filenames == FN_NOMATCH_ONLY) return 1;
1124 /* Just count if just counting is wanted. */
1126 if (count_only) count++;
1128 /* If all we want is a file name, there is no need to scan any more lines
1131 else if (filenames == FN_MATCH_ONLY)
1133 fprintf(stdout, "%s\n", printname);
1137 /* Likewise, if all we want is a yes/no answer. */
1139 else if (quiet) return 0;
1141 /* The --only-matching option prints just the substring that matched, and
1142 the --file-offsets and --line-offsets options output offsets for the
1143 matching substring (they both force --only-matching). None of these options
1144 prints any context. Afterwards, adjust the start and length, and then jump
1145 back to look for further matches in the same line. If we are in invert
1146 mode, however, nothing is printed - this could be still useful because the
1147 return code is set. */
1149 else if (only_matching)
1153 if (printname != NULL) fprintf(stdout, "%s:", printname);
1154 if (number) fprintf(stdout, "%d:", linenumber);
1156 fprintf(stdout, "%d,%d", (int)(matchptr + offsets[0] - ptr),
1157 offsets[1] - offsets[0]);
1158 else if (file_offsets)
1159 fprintf(stdout, "%d,%d", (int)(filepos + matchptr + offsets[0] - ptr),
1160 offsets[1] - offsets[0]);
1163 if (do_colour) fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1164 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1165 if (do_colour) fprintf(stdout, "%c[00m", 0x1b);
1167 fprintf(stdout, "\n");
1168 matchptr += offsets[1];
1169 length -= offsets[1];
1171 goto ONLY_MATCHING_RESTART;
1175 /* This is the default case when none of the above options is set. We print
1176 the matching lines(s), possibly preceded and/or followed by other lines of
1181 /* See if there is a requirement to print some "after" lines from a
1182 previous match. We never print any overlaps. */
1184 if (after_context > 0 && lastmatchnumber > 0)
1188 char *p = lastmatchrestart;
1190 while (p < ptr && linecount < after_context)
1192 p = end_of_line(p, ptr, &ellength);
1196 /* It is important to advance lastmatchrestart during this printing so
1197 that it interacts correctly with any "before" printing below. Print
1198 each line's data using fwrite() in case there are binary zeroes. */
1200 while (lastmatchrestart < p)
1202 char *pp = lastmatchrestart;
1203 if (printname != NULL) fprintf(stdout, "%s-", printname);
1204 if (number) fprintf(stdout, "%d-", lastmatchnumber++);
1205 pp = end_of_line(pp, endptr, &ellength);
1206 FWRITE(lastmatchrestart, 1, pp - lastmatchrestart, stdout);
1207 lastmatchrestart = pp;
1209 if (lastmatchrestart != ptr) hyphenpending = TRUE;
1212 /* If there were non-contiguous lines printed above, insert hyphens. */
1216 fprintf(stdout, "--\n");
1217 hyphenpending = FALSE;
1218 hyphenprinted = TRUE;
1221 /* See if there is a requirement to print some "before" lines for this
1222 match. Again, don't print overlaps. */
1224 if (before_context > 0)
1229 while (p > buffer && (lastmatchnumber == 0 || p > lastmatchrestart) &&
1230 linecount < before_context)
1233 p = previous_line(p, buffer);
1236 if (lastmatchnumber > 0 && p > lastmatchrestart && !hyphenprinted)
1237 fprintf(stdout, "--\n");
1243 if (printname != NULL) fprintf(stdout, "%s-", printname);
1244 if (number) fprintf(stdout, "%d-", linenumber - linecount--);
1245 pp = end_of_line(pp, endptr, &ellength);
1246 FWRITE(p, 1, pp - p, stdout);
1251 /* Now print the matching line(s); ensure we set hyphenpending at the end
1252 of the file if any context lines are being output. */
1254 if (after_context > 0 || before_context > 0)
1255 endhyphenpending = TRUE;
1257 if (printname != NULL) fprintf(stdout, "%s:", printname);
1258 if (number) fprintf(stdout, "%d:", linenumber);
1260 /* In multiline mode, we want to print to the end of the line in which
1261 the end of the matched string is found, so we adjust linelength and the
1262 line number appropriately, but only when there actually was a match
1263 (invert not set). Because the PCRE_FIRSTLINE option is set, the start of
1264 the match will always be before the first newline sequence. */
1269 char *endmatch = ptr;
1272 endmatch += offsets[1];
1274 while (t < endmatch)
1276 t = end_of_line(t, endptr, &ellength);
1277 if (t <= endmatch) linenumber++; else break;
1280 endmatch = end_of_line(endmatch, endptr, &ellength);
1281 linelength = endmatch - ptr - ellength;
1284 /*** NOTE: Use only fwrite() to output the data line, so that binary
1285 zeroes are treated as just another data character. */
1287 /* This extra option, for Jeffrey Friedl's debugging requirements,
1288 replaces the matched string, or a specific captured string if it exists,
1289 with X. When this happens, colouring is ignored. */
1291 #ifdef JFRIEDL_DEBUG
1292 if (S_arg >= 0 && S_arg < mrc)
1294 int first = S_arg * 2;
1295 int last = first + 1;
1296 FWRITE(ptr, 1, offsets[first], stdout);
1297 fprintf(stdout, "X");
1298 FWRITE(ptr + offsets[last], 1, linelength - offsets[last], stdout);
1303 /* We have to split the line(s) up if colouring, and search for further
1308 int last_offset = 0;
1309 FWRITE(ptr, 1, offsets[0], stdout);
1310 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1311 FWRITE(ptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1312 fprintf(stdout, "%c[00m", 0x1b);
1315 last_offset += offsets[1];
1316 matchptr += offsets[1];
1317 length -= offsets[1];
1318 if (!match_patterns(matchptr, length, offsets, &mrc)) break;
1319 FWRITE(matchptr, 1, offsets[0], stdout);
1320 fprintf(stdout, "%c[%sm", 0x1b, colour_string);
1321 FWRITE(matchptr + offsets[0], 1, offsets[1] - offsets[0], stdout);
1322 fprintf(stdout, "%c[00m", 0x1b);
1324 FWRITE(ptr + last_offset, 1,
1325 (linelength + endlinelength) - last_offset, stdout);
1328 /* Not colouring; no need to search for further matches */
1330 else FWRITE(ptr, 1, linelength + endlinelength, stdout);
1333 /* End of doing what has to be done for a match. If --line-buffered was
1334 given, flush the output. */
1336 if (line_buffered) fflush(stdout);
1337 rc = 0; /* Had some success */
1339 /* Remember where the last match happened for after_context. We remember
1340 where we are about to restart, and that line's number. */
1342 lastmatchrestart = ptr + linelength + endlinelength;
1343 lastmatchnumber = linenumber + 1;
1346 /* For a match in multiline inverted mode (which of course did not cause
1347 anything to be printed), we have to move on to the end of the match before
1350 if (multiline && invert && match)
1353 char *endmatch = ptr + offsets[1];
1355 while (t < endmatch)
1357 t = end_of_line(t, endptr, &ellength);
1358 if (t <= endmatch) linenumber++; else break;
1360 endmatch = end_of_line(endmatch, endptr, &ellength);
1361 linelength = endmatch - ptr - ellength;
1364 /* Advance to after the newline and increment the line number. The file
1365 offset to the current line is maintained in filepos. */
1367 ptr += linelength + endlinelength;
1368 filepos += (int)(linelength + endlinelength);
1371 /* If input is line buffered, and the buffer is not yet full, read another
1372 line and add it into the buffer. */
1374 if (input_line_buffered && bufflength < sizeof(buffer))
1376 int add = read_one_line(ptr, sizeof(buffer) - (ptr - buffer), in);
1381 /* If we haven't yet reached the end of the file (the buffer is full), and
1382 the current point is in the top 1/3 of the buffer, slide the buffer down by
1383 1/3 and refill it. Before we do this, if some unprinted "after" lines are
1384 about to be lost, print them. */
1386 if (bufflength >= sizeof(buffer) && ptr > buffer + 2*MBUFTHIRD)
1388 if (after_context > 0 &&
1389 lastmatchnumber > 0 &&
1390 lastmatchrestart < buffer + MBUFTHIRD)
1392 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1393 lastmatchnumber = 0;
1396 /* Now do the shuffle */
1398 memmove(buffer, buffer + MBUFTHIRD, 2*MBUFTHIRD);
1402 if (frtype == FR_LIBZ)
1403 bufflength = 2*MBUFTHIRD +
1404 gzread (ingz, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1408 #ifdef SUPPORT_LIBBZ2
1409 if (frtype == FR_LIBBZ2)
1410 bufflength = 2*MBUFTHIRD +
1411 BZ2_bzread(inbz2, buffer + 2*MBUFTHIRD, MBUFTHIRD);
1415 bufflength = 2*MBUFTHIRD +
1416 (input_line_buffered?
1417 read_one_line(buffer + 2*MBUFTHIRD, MBUFTHIRD, in) :
1418 fread(buffer + 2*MBUFTHIRD, 1, MBUFTHIRD, in));
1419 endptr = buffer + bufflength;
1421 /* Adjust any last match point */
1423 if (lastmatchnumber > 0) lastmatchrestart -= MBUFTHIRD;
1425 } /* Loop through the whole file */
1427 /* End of file; print final "after" lines if wanted; do_after_lines sets
1428 hyphenpending if it prints something. */
1430 if (!only_matching && !count_only)
1432 do_after_lines(lastmatchnumber, lastmatchrestart, endptr, printname);
1433 hyphenpending |= endhyphenpending;
1436 /* Print the file name if we are looking for those without matches and there
1437 were none. If we found a match, we won't have got this far. */
1439 if (filenames == FN_NOMATCH_ONLY)
1441 fprintf(stdout, "%s\n", printname);
1445 /* Print the match count if wanted */
1449 if (count > 0 || !omit_zero_count)
1451 if (printname != NULL && filenames != FN_NONE)
1452 fprintf(stdout, "%s:", printname);
1453 fprintf(stdout, "%d\n", count);
1462 /*************************************************
1463 * Grep a file or recurse into a directory *
1464 *************************************************/
1466 /* Given a path name, if it's a directory, scan all the files if we are
1467 recursing; if it's a file, grep it.
1470 pathname the path to investigate
1471 dir_recurse TRUE if recursing is wanted (-r or -drecurse)
1472 only_one_at_top TRUE if the path is the only one at toplevel
1474 Returns: 0 if there was at least one match
1475 1 if there were no matches
1476 2 there was some kind of error
1478 However, file opening failures are suppressed if "silent" is set.
1482 grep_or_recurse(char *pathname, BOOL dir_recurse, BOOL only_one_at_top)
1489 FILE *in = NULL; /* Ensure initialized */
1495 #ifdef SUPPORT_LIBBZ2
1496 BZFILE *inbz2 = NULL;
1499 /* If the file name is "-" we scan stdin */
1501 if (strcmp(pathname, "-") == 0)
1503 return pcregrep(stdin, FR_PLAIN,
1504 (filenames > FN_DEFAULT || (filenames == FN_DEFAULT && !only_one_at_top))?
1508 /* If the file is a directory, skip if skipping or if we are recursing, scan
1509 each file and directory within it, subject to any include or exclude patterns
1510 that were set. The scanning code is localized so it can be made
1513 if ((sep = isdirectory(pathname)) != 0)
1515 if (dee_action == dee_SKIP) return 1;
1516 if (dee_action == dee_RECURSE)
1520 directory_type *dir = opendirectory(pathname);
1525 fprintf(stderr, "pcregrep: Failed to open directory %s: %s\n", pathname,
1530 while ((nextfile = readdirectory(dir)) != NULL)
1533 sprintf(buffer, "%.512s%c%.128s", pathname, sep, nextfile);
1534 nflen = (int)(strlen(nextfile));
1536 if (isdirectory(buffer))
1538 if (exclude_dir_compiled != NULL &&
1539 pcre_exec(exclude_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1542 if (include_dir_compiled != NULL &&
1543 pcre_exec(include_dir_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1548 if (exclude_compiled != NULL &&
1549 pcre_exec(exclude_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) >= 0)
1552 if (include_compiled != NULL &&
1553 pcre_exec(include_compiled, NULL, nextfile, nflen, 0, 0, NULL, 0) < 0)
1557 frc = grep_or_recurse(buffer, dir_recurse, FALSE);
1558 if (frc > 1) rc = frc;
1559 else if (frc == 0 && rc == 1) rc = 0;
1562 closedirectory(dir);
1567 /* If the file is not a directory and not a regular file, skip it if that's
1570 else if (!isregfile(pathname) && DEE_action == DEE_SKIP) return 1;
1572 /* Control reaches here if we have a regular file, or if we have a directory
1573 and recursion or skipping was not requested, or if we have anything else and
1574 skipping was not requested. The scan proceeds. If this is the first and only
1575 argument at top level, we don't show the file name, unless we are only showing
1576 the file name, or the filename was forced (-H). */
1578 pathlen = (int)(strlen(pathname));
1580 /* Open using zlib if it is supported and the file name ends with .gz. */
1583 if (pathlen > 3 && strcmp(pathname + pathlen - 3, ".gz") == 0)
1585 ingz = gzopen(pathname, "rb");
1589 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1593 handle = (void *)ingz;
1599 /* Otherwise open with bz2lib if it is supported and the name ends with .bz2. */
1601 #ifdef SUPPORT_LIBBZ2
1602 if (pathlen > 4 && strcmp(pathname + pathlen - 4, ".bz2") == 0)
1604 inbz2 = BZ2_bzopen(pathname, "rb");
1605 handle = (void *)inbz2;
1611 /* Otherwise use plain fopen(). The label is so that we can come back here if
1612 an attempt to read a .bz2 file indicates that it really is a plain file. */
1614 #ifdef SUPPORT_LIBBZ2
1618 in = fopen(pathname, "rb");
1619 handle = (void *)in;
1623 /* All the opening methods return errno when they fail. */
1628 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pathname,
1633 /* Now grep the file */
1635 rc = pcregrep(handle, frtype, (filenames > FN_DEFAULT ||
1636 (filenames == FN_DEFAULT && !only_one_at_top))? pathname : NULL);
1638 /* Close in an appropriate manner. */
1641 if (frtype == FR_LIBZ)
1646 /* If it is a .bz2 file and the result is 2, it means that the first attempt to
1647 read failed. If the error indicates that the file isn't in fact bzipped, try
1648 again as a normal file. */
1650 #ifdef SUPPORT_LIBBZ2
1651 if (frtype == FR_LIBBZ2)
1656 const char *err = BZ2_bzerror(inbz2, &errnum);
1657 if (errnum == BZ_DATA_ERROR_MAGIC)
1663 fprintf(stderr, "pcregrep: Failed to read %s using bzlib: %s\n",
1671 /* Normal file close */
1675 /* Pass back the yield from pcregrep(). */
1683 /*************************************************
1685 *************************************************/
1691 fprintf(stderr, "Usage: pcregrep [-");
1692 for (op = optionlist; op->one_char != 0; op++)
1694 if (op->one_char > 0) fprintf(stderr, "%c", op->one_char);
1696 fprintf(stderr, "] [long options] [pattern] [files]\n");
1697 fprintf(stderr, "Type `pcregrep --help' for more information and the long "
1705 /*************************************************
1707 *************************************************/
1714 printf("Usage: pcregrep [OPTION]... [PATTERN] [FILE1 FILE2 ...]\n");
1715 printf("Search for PATTERN in each FILE or standard input.\n");
1716 printf("PATTERN must be present if neither -e nor -f is used.\n");
1717 printf("\"-\" can be used as a file name to mean STDIN.\n");
1720 printf("Files whose names end in .gz are read using zlib.\n");
1723 #ifdef SUPPORT_LIBBZ2
1724 printf("Files whose names end in .bz2 are read using bzlib2.\n");
1727 #if defined SUPPORT_LIBZ || defined SUPPORT_LIBBZ2
1728 printf("Other files and the standard input are read as plain files.\n\n");
1730 printf("All files are read as plain files, without any interpretation.\n\n");
1733 printf("Example: pcregrep -i 'hello.*world' menu.h main.c\n\n");
1734 printf("Options:\n");
1736 for (op = optionlist; op->one_char != 0; op++)
1740 if (op->one_char > 0) sprintf(s, "-%c,", op->one_char); else strcpy(s, " ");
1741 n = 30 - printf(" %s --%s", s, op->long_name);
1743 printf("%.*s%s\n", n, " ", op->help_text);
1746 printf("\nWhen reading patterns from a file instead of using a command line option,\n");
1747 printf("trailing white space is removed and blank lines are ignored.\n");
1748 printf("There is a maximum of %d patterns.\n", MAX_PATTERN_COUNT);
1750 printf("\nWith no FILEs, read standard input. If fewer than two FILEs given, assume -h.\n");
1751 printf("Exit status is 0 if any matches, 1 if no matches, and 2 if trouble.\n");
1757 /*************************************************
1758 * Handle a single-letter, no data option *
1759 *************************************************/
1762 handle_option(int letter, int options)
1766 case N_FOFFSETS: file_offsets = TRUE; break;
1767 case N_HELP: help(); exit(0);
1768 case N_LOFFSETS: line_offsets = number = TRUE; break;
1769 case N_LBUFFER: line_buffered = TRUE; break;
1770 case 'c': count_only = TRUE; break;
1771 case 'F': process_options |= PO_FIXED_STRINGS; break;
1772 case 'H': filenames = FN_FORCE; break;
1773 case 'h': filenames = FN_NONE; break;
1774 case 'i': options |= PCRE_CASELESS; break;
1775 case 'l': omit_zero_count = TRUE; filenames = FN_MATCH_ONLY; break;
1776 case 'L': filenames = FN_NOMATCH_ONLY; break;
1777 case 'M': multiline = TRUE; options |= PCRE_MULTILINE|PCRE_FIRSTLINE; break;
1778 case 'n': number = TRUE; break;
1779 case 'o': only_matching = TRUE; break;
1780 case 'q': quiet = TRUE; break;
1781 case 'r': dee_action = dee_RECURSE; break;
1782 case 's': silent = TRUE; break;
1783 case 'u': options |= PCRE_UTF8; utf8 = TRUE; break;
1784 case 'v': invert = TRUE; break;
1785 case 'w': process_options |= PO_WORD_MATCH; break;
1786 case 'x': process_options |= PO_LINE_MATCH; break;
1789 fprintf(stderr, "pcregrep version %s\n", pcre_version());
1794 fprintf(stderr, "pcregrep: Unknown option -%c\n", letter);
1804 /*************************************************
1805 * Construct printed ordinal *
1806 *************************************************/
1808 /* This turns a number into "1st", "3rd", etc. */
1813 static char buffer[8];
1815 sprintf(p, "%d", n);
1816 while (*p != 0) p++;
1819 case 1: strcpy(p, "st"); break;
1820 case 2: strcpy(p, "nd"); break;
1821 case 3: strcpy(p, "rd"); break;
1822 default: strcpy(p, "th"); break;
1829 /*************************************************
1830 * Compile a single pattern *
1831 *************************************************/
1833 /* When the -F option has been used, this is called for each substring.
1834 Otherwise it's called for each supplied pattern.
1837 pattern the pattern string
1838 options the PCRE options
1839 filename the file name, or NULL for a command-line pattern
1840 count 0 if this is the only command line pattern, or
1841 number of the command line pattern, or
1842 linenumber for a pattern from a file
1844 Returns: TRUE on success, FALSE after an error
1848 compile_single_pattern(char *pattern, int options, char *filename, int count)
1850 char buffer[MBUFTHIRD + 16];
1854 if (pattern_count >= MAX_PATTERN_COUNT)
1856 fprintf(stderr, "pcregrep: Too many %spatterns (max %d)\n",
1857 (filename == NULL)? "command-line " : "", MAX_PATTERN_COUNT);
1861 sprintf(buffer, "%s%.*s%s", prefix[process_options], MBUFTHIRD, pattern,
1862 suffix[process_options]);
1863 pattern_list[pattern_count] =
1864 pcre_compile(buffer, options, &error, &errptr, pcretables);
1865 if (pattern_list[pattern_count] != NULL)
1871 /* Handle compile errors */
1873 errptr -= (int)strlen(prefix[process_options]);
1874 if (errptr > (int)strlen(pattern)) errptr = (int)strlen(pattern);
1876 if (filename == NULL)
1879 fprintf(stderr, "pcregrep: Error in command-line regex "
1880 "at offset %d: %s\n", errptr, error);
1882 fprintf(stderr, "pcregrep: Error in %s command-line regex "
1883 "at offset %d: %s\n", ordin(count), errptr, error);
1887 fprintf(stderr, "pcregrep: Error in regex in line %d of %s "
1888 "at offset %d: %s\n", count, filename, errptr, error);
1896 /*************************************************
1897 * Compile one supplied pattern *
1898 *************************************************/
1900 /* When the -F option has been used, each string may be a list of strings,
1901 separated by line breaks. They will be matched literally.
1904 pattern the pattern string
1905 options the PCRE options
1906 filename the file name, or NULL for a command-line pattern
1907 count 0 if this is the only command line pattern, or
1908 number of the command line pattern, or
1909 linenumber for a pattern from a file
1911 Returns: TRUE on success, FALSE after an error
1915 compile_pattern(char *pattern, int options, char *filename, int count)
1917 if ((process_options & PO_FIXED_STRINGS) != 0)
1919 char *eop = pattern + strlen(pattern);
1920 char buffer[MBUFTHIRD];
1924 char *p = end_of_line(pattern, eop, &ellength);
1926 return compile_single_pattern(pattern, options, filename, count);
1927 sprintf(buffer, "%.*s", (int)(p - pattern - ellength), pattern);
1929 if (!compile_single_pattern(buffer, options, filename, count))
1933 else return compile_single_pattern(pattern, options, filename, count);
1938 /*************************************************
1940 *************************************************/
1942 /* Returns 0 if something matched, 1 if nothing matched, 2 after an error. */
1945 main(int argc, char **argv)
1949 int pcre_options = 0;
1950 int cmd_pattern_count = 0;
1953 BOOL only_one_at_top;
1954 char *patterns[MAX_PATTERN_COUNT];
1955 const char *locale_from = "--locale";
1958 /* Set the default line ending value from the default in the PCRE library;
1959 "lf", "cr", "crlf", and "any" are supported. Anything else is treated as "lf".
1960 Note that the return values from pcre_config(), though derived from the ASCII
1961 codes, are the same in EBCDIC environments, so we must use the actual values
1962 rather than escapes such as as '\r'. */
1964 (void)pcre_config(PCRE_CONFIG_NEWLINE, &i);
1967 default: newline = (char *)"lf"; break;
1968 case 13: newline = (char *)"cr"; break;
1969 case (13 << 8) | 10: newline = (char *)"crlf"; break;
1970 case -1: newline = (char *)"any"; break;
1971 case -2: newline = (char *)"anycrlf"; break;
1974 /* Process the options */
1976 for (i = 1; i < argc; i++)
1978 option_item *op = NULL;
1979 char *option_data = (char *)""; /* default to keep compiler happy */
1981 BOOL longopwasequals = FALSE;
1983 if (argv[i][0] != '-') break;
1985 /* If we hit an argument that is just "-", it may be a reference to STDIN,
1986 but only if we have previously had -e or -f to define the patterns. */
1988 if (argv[i][1] == 0)
1990 if (pattern_filename != NULL || pattern_count > 0) break;
1991 else exit(usage(2));
1994 /* Handle a long name option, or -- to terminate the options */
1996 if (argv[i][1] == '-')
1998 char *arg = argv[i] + 2;
1999 char *argequals = strchr(arg, '=');
2001 if (*arg == 0) /* -- terminates options */
2004 break; /* out of the options-handling loop */
2009 /* Some long options have data that follows after =, for example file=name.
2010 Some options have variations in the long name spelling: specifically, we
2011 allow "regexp" because GNU grep allows it, though I personally go along
2012 with Jeffrey Friedl and Larry Wall in preferring "regex" without the "p".
2013 These options are entered in the table as "regex(p)". Options can be in
2014 both these categories. */
2016 for (op = optionlist; op->one_char != 0; op++)
2018 char *opbra = strchr(op->long_name, '(');
2019 char *equals = strchr(op->long_name, '=');
2021 /* Handle options with only one spelling of the name */
2023 if (opbra == NULL) /* Does not contain '(' */
2025 if (equals == NULL) /* Not thing=data case */
2027 if (strcmp(arg, op->long_name) == 0) break;
2029 else /* Special case xxx=data */
2031 int oplen = (int)(equals - op->long_name);
2032 int arglen = (argequals == NULL)?
2033 (int)strlen(arg) : (int)(argequals - arg);
2034 if (oplen == arglen && strncmp(arg, op->long_name, oplen) == 0)
2036 option_data = arg + arglen;
2037 if (*option_data == '=')
2040 longopwasequals = TRUE;
2047 /* Handle options with an alternate spelling of the name */
2054 int baselen = (int)(opbra - op->long_name);
2055 int fulllen = (int)(strchr(op->long_name, ')') - op->long_name + 1);
2056 int arglen = (argequals == NULL || equals == NULL)?
2057 (int)strlen(arg) : (int)(argequals - arg);
2059 sprintf(buff1, "%.*s", baselen, op->long_name);
2060 sprintf(buff2, "%s%.*s", buff1, fulllen - baselen - 2, opbra + 1);
2062 if (strncmp(arg, buff1, arglen) == 0 ||
2063 strncmp(arg, buff2, arglen) == 0)
2065 if (equals != NULL && argequals != NULL)
2067 option_data = argequals;
2068 if (*option_data == '=')
2071 longopwasequals = TRUE;
2079 if (op->one_char == 0)
2081 fprintf(stderr, "pcregrep: Unknown option %s\n", argv[i]);
2086 /* Jeffrey Friedl's debugging harness uses these additional options which
2087 are not in the right form for putting in the option table because they use
2088 only one hyphen, yet are more than one character long. By putting them
2089 separately here, they will not get displayed as part of the help() output,
2090 but I don't think Jeffrey will care about that. */
2092 #ifdef JFRIEDL_DEBUG
2093 else if (strcmp(argv[i], "-pre") == 0) {
2094 jfriedl_prefix = argv[++i];
2096 } else if (strcmp(argv[i], "-post") == 0) {
2097 jfriedl_postfix = argv[++i];
2099 } else if (strcmp(argv[i], "-XT") == 0) {
2100 sscanf(argv[++i], "%d", &jfriedl_XT);
2102 } else if (strcmp(argv[i], "-XR") == 0) {
2103 sscanf(argv[++i], "%d", &jfriedl_XR);
2109 /* One-char options; many that have no data may be in a single argument; we
2110 continue till we hit the last one or one that needs data. */
2114 char *s = argv[i] + 1;
2118 for (op = optionlist; op->one_char != 0; op++)
2119 { if (*s == op->one_char) break; }
2120 if (op->one_char == 0)
2122 fprintf(stderr, "pcregrep: Unknown option letter '%c' in \"%s\"\n",
2126 if (op->type != OP_NODATA || s[1] == 0)
2131 pcre_options = handle_option(*s++, pcre_options);
2135 /* At this point we should have op pointing to a matched option. If the type
2136 is NO_DATA, it means that there is no data, and the option might set
2137 something in the PCRE options. */
2139 if (op->type == OP_NODATA)
2141 pcre_options = handle_option(op->one_char, pcre_options);
2145 /* If the option type is OP_OP_STRING or OP_OP_NUMBER, it's an option that
2146 either has a value or defaults to something. It cannot have data in a
2147 separate item. At the moment, the only such options are "colo(u)r" and
2148 Jeffrey Friedl's special -S debugging option. */
2150 if (*option_data == 0 &&
2151 (op->type == OP_OP_STRING || op->type == OP_OP_NUMBER))
2153 switch (op->one_char)
2156 colour_option = (char *)"auto";
2158 #ifdef JFRIEDL_DEBUG
2167 /* Otherwise, find the data string for the option. */
2169 if (*option_data == 0)
2171 if (i >= argc - 1 || longopwasequals)
2173 fprintf(stderr, "pcregrep: Data missing after %s\n", argv[i]);
2176 option_data = argv[++i];
2179 /* If the option type is OP_PATLIST, it's the -e option, which can be called
2180 multiple times to create a list of patterns. */
2182 if (op->type == OP_PATLIST)
2184 if (cmd_pattern_count >= MAX_PATTERN_COUNT)
2186 fprintf(stderr, "pcregrep: Too many command-line patterns (max %d)\n",
2190 patterns[cmd_pattern_count++] = option_data;
2193 /* Otherwise, deal with single string or numeric data values. */
2195 else if (op->type != OP_NUMBER && op->type != OP_OP_NUMBER)
2197 *((char **)op->dataptr) = option_data;
2202 int n = strtoul(option_data, &endptr, 10);
2207 char *equals = strchr(op->long_name, '=');
2208 int nlen = (equals == NULL)? (int)strlen(op->long_name) :
2209 (int)(equals - op->long_name);
2210 fprintf(stderr, "pcregrep: Malformed number \"%s\" after --%.*s\n",
2211 option_data, nlen, op->long_name);
2214 fprintf(stderr, "pcregrep: Malformed number \"%s\" after -%c\n",
2215 option_data, op->one_char);
2218 *((int *)op->dataptr) = n;
2222 /* Options have been decoded. If -C was used, its value is used as a default
2225 if (both_context > 0)
2227 if (after_context == 0) after_context = both_context;
2228 if (before_context == 0) before_context = both_context;
2231 /* Only one of --only-matching, --file-offsets, or --line-offsets is permitted.
2232 However, the latter two set the only_matching flag. */
2234 if ((only_matching && (file_offsets || line_offsets)) ||
2235 (file_offsets && line_offsets))
2237 fprintf(stderr, "pcregrep: Cannot mix --only-matching, --file-offsets "
2238 "and/or --line-offsets\n");
2242 if (file_offsets || line_offsets) only_matching = TRUE;
2244 /* If a locale has not been provided as an option, see if the LC_CTYPE or
2245 LC_ALL environment variable is set, and if so, use it. */
2249 locale = getenv("LC_ALL");
2250 locale_from = "LCC_ALL";
2255 locale = getenv("LC_CTYPE");
2256 locale_from = "LC_CTYPE";
2259 /* If a locale has been provided, set it, and generate the tables the PCRE
2260 needs. Otherwise, pcretables==NULL, which causes the use of default tables. */
2264 if (setlocale(LC_CTYPE, locale) == NULL)
2266 fprintf(stderr, "pcregrep: Failed to set locale %s (obtained from %s)\n",
2267 locale, locale_from);
2270 pcretables = pcre_maketables();
2273 /* Sort out colouring */
2275 if (colour_option != NULL && strcmp(colour_option, "never") != 0)
2277 if (strcmp(colour_option, "always") == 0) do_colour = TRUE;
2278 else if (strcmp(colour_option, "auto") == 0) do_colour = is_stdout_tty();
2281 fprintf(stderr, "pcregrep: Unknown colour setting \"%s\"\n",
2287 char *cs = getenv("PCREGREP_COLOUR");
2288 if (cs == NULL) cs = getenv("PCREGREP_COLOR");
2289 if (cs != NULL) colour_string = cs;
2293 /* Interpret the newline type; the default settings are Unix-like. */
2295 if (strcmp(newline, "cr") == 0 || strcmp(newline, "CR") == 0)
2297 pcre_options |= PCRE_NEWLINE_CR;
2298 endlinetype = EL_CR;
2300 else if (strcmp(newline, "lf") == 0 || strcmp(newline, "LF") == 0)
2302 pcre_options |= PCRE_NEWLINE_LF;
2303 endlinetype = EL_LF;
2305 else if (strcmp(newline, "crlf") == 0 || strcmp(newline, "CRLF") == 0)
2307 pcre_options |= PCRE_NEWLINE_CRLF;
2308 endlinetype = EL_CRLF;
2310 else if (strcmp(newline, "any") == 0 || strcmp(newline, "ANY") == 0)
2312 pcre_options |= PCRE_NEWLINE_ANY;
2313 endlinetype = EL_ANY;
2315 else if (strcmp(newline, "anycrlf") == 0 || strcmp(newline, "ANYCRLF") == 0)
2317 pcre_options |= PCRE_NEWLINE_ANYCRLF;
2318 endlinetype = EL_ANYCRLF;
2322 fprintf(stderr, "pcregrep: Invalid newline specifier \"%s\"\n", newline);
2326 /* Interpret the text values for -d and -D */
2328 if (dee_option != NULL)
2330 if (strcmp(dee_option, "read") == 0) dee_action = dee_READ;
2331 else if (strcmp(dee_option, "recurse") == 0) dee_action = dee_RECURSE;
2332 else if (strcmp(dee_option, "skip") == 0) dee_action = dee_SKIP;
2335 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -d\n", dee_option);
2340 if (DEE_option != NULL)
2342 if (strcmp(DEE_option, "read") == 0) DEE_action = DEE_READ;
2343 else if (strcmp(DEE_option, "skip") == 0) DEE_action = DEE_SKIP;
2346 fprintf(stderr, "pcregrep: Invalid value \"%s\" for -D\n", DEE_option);
2351 /* Check the values for Jeffrey Friedl's debugging options. */
2353 #ifdef JFRIEDL_DEBUG
2356 fprintf(stderr, "pcregrep: bad value for -S option\n");
2359 if (jfriedl_XT != 0 || jfriedl_XR != 0)
2361 if (jfriedl_XT == 0) jfriedl_XT = 1;
2362 if (jfriedl_XR == 0) jfriedl_XR = 1;
2366 /* Get memory to store the pattern and hints lists. */
2368 pattern_list = (pcre **)malloc(MAX_PATTERN_COUNT * sizeof(pcre *));
2369 hints_list = (pcre_extra **)malloc(MAX_PATTERN_COUNT * sizeof(pcre_extra *));
2371 if (pattern_list == NULL || hints_list == NULL)
2373 fprintf(stderr, "pcregrep: malloc failed\n");
2377 /* If no patterns were provided by -e, and there is no file provided by -f,
2378 the first argument is the one and only pattern, and it must exist. */
2380 if (cmd_pattern_count == 0 && pattern_filename == NULL)
2382 if (i >= argc) return usage(2);
2383 patterns[cmd_pattern_count++] = argv[i++];
2386 /* Compile the patterns that were provided on the command line, either by
2387 multiple uses of -e or as a single unkeyed pattern. */
2389 for (j = 0; j < cmd_pattern_count; j++)
2391 if (!compile_pattern(patterns[j], pcre_options, NULL,
2392 (j == 0 && cmd_pattern_count == 1)? 0 : j + 1))
2396 /* Compile the regular expressions that are provided in a file. */
2398 if (pattern_filename != NULL)
2403 char buffer[MBUFTHIRD];
2405 if (strcmp(pattern_filename, "-") == 0)
2408 filename = stdin_name;
2412 f = fopen(pattern_filename, "r");
2415 fprintf(stderr, "pcregrep: Failed to open %s: %s\n", pattern_filename,
2419 filename = pattern_filename;
2422 while (fgets(buffer, MBUFTHIRD, f) != NULL)
2424 char *s = buffer + (int)strlen(buffer);
2425 while (s > buffer && isspace((unsigned char)(s[-1]))) s--;
2428 if (buffer[0] == 0) continue; /* Skip blank lines */
2429 if (!compile_pattern(buffer, pcre_options, filename, linenumber))
2433 if (f != stdin) fclose(f);
2436 /* Study the regular expressions, as we will be running them many times */
2438 for (j = 0; j < pattern_count; j++)
2440 hints_list[j] = pcre_study(pattern_list[j], 0, &error);
2444 if (pattern_count == 1) s[0] = 0; else sprintf(s, " number %d", j);
2445 fprintf(stderr, "pcregrep: Error while studying regex%s: %s\n", s, error);
2451 /* If there are include or exclude patterns, compile them. */
2453 if (exclude_pattern != NULL)
2455 exclude_compiled = pcre_compile(exclude_pattern, 0, &error, &errptr,
2457 if (exclude_compiled == NULL)
2459 fprintf(stderr, "pcregrep: Error in 'exclude' regex at offset %d: %s\n",
2465 if (include_pattern != NULL)
2467 include_compiled = pcre_compile(include_pattern, 0, &error, &errptr,
2469 if (include_compiled == NULL)
2471 fprintf(stderr, "pcregrep: Error in 'include' regex at offset %d: %s\n",
2477 if (exclude_dir_pattern != NULL)
2479 exclude_dir_compiled = pcre_compile(exclude_dir_pattern, 0, &error, &errptr,
2481 if (exclude_dir_compiled == NULL)
2483 fprintf(stderr, "pcregrep: Error in 'exclude_dir' regex at offset %d: %s\n",
2489 if (include_dir_pattern != NULL)
2491 include_dir_compiled = pcre_compile(include_dir_pattern, 0, &error, &errptr,
2493 if (include_dir_compiled == NULL)
2495 fprintf(stderr, "pcregrep: Error in 'include_dir' regex at offset %d: %s\n",
2501 /* If there are no further arguments, do the business on stdin and exit. */
2505 rc = pcregrep(stdin, FR_PLAIN, (filenames > FN_DEFAULT)? stdin_name : NULL);
2509 /* Otherwise, work through the remaining arguments as files or directories.
2510 Pass in the fact that there is only one argument at top level - this suppresses
2511 the file name if the argument is not a directory and filenames are not
2512 otherwise forced. */
2514 only_one_at_top = i == argc - 1; /* Catch initial value of i */
2516 for (; i < argc; i++)
2518 int frc = grep_or_recurse(argv[i], dee_action == dee_RECURSE,
2520 if (frc > 1) rc = frc;
2521 else if (frc == 0 && rc == 1) rc = 0;
2525 if (pattern_list != NULL)
2527 for (i = 0; i < pattern_count; i++) free(pattern_list[i]);
2530 if (hints_list != NULL)
2532 for (i = 0; i < hint_count; i++) free(hints_list[i]);
2542 /* End of pcregrep */