''' Behave like GNU grep but use python regexp syntax. Note that the real GNU grep program will be 10-20 times faster than this script. Copyright (C) 2005 Don Peterson This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ''' import re, sys, string, getopt dbg = 0 # Global variables return_status = 1 ignore_case = 0 # -i just_print_files = 0 # -l quiet = 0 # -q patterns_from_file = [] # -f invert_match = 0 # -v print_line_number = 0 # -n print_count = 0 # -c recursive = 0 # -r fixed_string = 0 # -F fgrep behavior files_from_stdin = 0 # -@ get_lines_from_stdin = 0 regexps = [] files_to_process = [] files_with_match = {} # Key is filename, value is number of matches def Dbg(s, nonl=0): if not nonl: s += "\n" s = "+" + s if dbg: sys.stdout.write(s) def Error(s): sys.stderr.write(s) sys.exit(2) def Usage(): s = '''Usage: %s [options] [regexp] [file1 [file2 ...]] Search for (python) regular expression pattern matches in each file. Options: -F The regexp is a fixed string (i.e., fgrep). -c Print a count of matching lines per file. -f Obtain pattern(s) from file, one per line. -h Print this help. -i Ignore case in the match. -l Print filenames that have one or more matches. -n Preface matching lines with line number. -q No output; return status = 0 if match found, = 1 if not. -r Descend directories recursively. -v Print non-matching lines. -@ Read the list of files to process from stdin. ''' % (sys.argv[0]) Error(s) def GetFilesFromStdin(): global files_to_process files = [] for line in sys.stdin.readlines(): files += line.split() files += files_to_process files_to_process = files def ProcessCommandLine(): global regexps, files_to_process, patterns_from_file, get_lines_from_stdin try: optlist, args = getopt.getopt(sys.argv[1:], "cf:Fihlnqrv@") except getopt.error, str: print "getopt error: %s\n" % str sys.exit(1) for opt in optlist: if opt[0] == "-c": global print_count print_count = 1 Dbg("-c") if opt[0] == "-f": lines = open(opt[1]).readlines() patterns_from_file = [x[:-1] for x in lines] # Chop newline if len(patterns_from_file) == 0: Error("Pattern file is empty\n") Dbg("-f") if opt[0] == "-F": global fixed_string fixed_string = 1 Dbg("-F") if opt[0] == "-i": global ignore_case ignore_case = 1 Dbg("-i") if opt[0] == "-h": Usage() if opt[0] == "-l": global just_print_files just_print_files = 1 Dbg("-l") if opt[0] == "-n": global print_line_number print_line_number = 1 Dbg("-n") if opt[0] == "-q": global quiet quiet = 1 Dbg("-q") if opt[0] == "-r": Error("Recursive behavior not implemented yet\n") global recursive recursive = 1 Dbg("-r") if opt[0] == "-v": global invert_match invert_match = 1 Dbg("-v") if opt[0] == "-@": global files_from_stdin files_from_stdin = 1 Dbg("-@") Dbg("args = " + `args`) if len(args) == 0: # No regexp or file on command line if not patterns_from_file: Error("Need at least one pattern (use -h for help)\n") if files_from_stdin: GetFilesFromStdin() else: get_lines_from_stdin = 1 elif len(args) == 1: # Only regexp given regexps = [args[0]] if files_from_stdin: GetFilesFromStdin() else: get_lines_from_stdin = 1 else: regexps = [args[0]] files_to_process = args[1:] regexps += patterns_from_file if len(regexps) == 0: Error("Need at least one regexp\n") Dbg("Files to process: " + `files_to_process`) Dbg("Regexp(s): " + `regexps`) if not fixed_string: # Compile all regexps patterns = [] for pattern in regexps: try: if ignore_case: rec = re.compile(pattern, re.I) else: rec = re.compile(pattern) except: s = "'%s' is a bad pattern\n" % pattern sys.exit(1) patterns.append(rec) regexps = patterns def ProcessLines(lines, filename, print_filename=0): def PrintLine(line): global return_status return_status = 0 if quiet: return s = "" if print_filename and len(files_to_process) > 1: s += filename + ":" if print_line_number: s += "%d:" % line_number s += line sys.stdout.write(s) global return_status line_number = 0 for line in lines: line_number += 1 had_match = 0 for regexp in regexps: if fixed_string: if ignore_case: if regexp.lower() in line.lower(): had_match = 1 else: if regexp in line: had_match = 1 else: if invert_match: if not regexp.search(line): had_match = 1 else: if regexp.search(line): had_match = 1 if had_match: if not print_count and not just_print_files: PrintLine(line) if files_with_match.has_key(filename): files_with_match[filename] += 1 else: files_with_match[filename] = 1 if not print_count: # Don't break if we're going to count up all matches, # as we want to loop through all the regexps. break def main(): ProcessCommandLine() if len(files_to_process): for file in files_to_process: lines = open(file).readlines() ProcessLines(lines, file, print_filename=1) else: lines = sys.stdin.readlines() ProcessLines(lines, None) if print_count: for file in files_to_process: if files_with_match.has_key(file): print "%s:%d" % (file, files_with_match[file]) if just_print_files: for file in files_to_process: if files_with_match.has_key(file): print file sys.exit(return_status) main()