diff options
Diffstat (limited to 'sys/src/cmd/python/Demo/comparisons/regextest.py')
-rwxr-xr-x | sys/src/cmd/python/Demo/comparisons/regextest.py | 47 |
1 files changed, 47 insertions, 0 deletions
diff --git a/sys/src/cmd/python/Demo/comparisons/regextest.py b/sys/src/cmd/python/Demo/comparisons/regextest.py new file mode 100755 index 000000000..b27d741d7 --- /dev/null +++ b/sys/src/cmd/python/Demo/comparisons/regextest.py @@ -0,0 +1,47 @@ +#! /usr/bin/env python + +# 1) Regular Expressions Test +# +# Read a file of (extended per egrep) regular expressions (one per line), +# and apply those to all files whose names are listed on the command line. +# Basically, an 'egrep -f' simulator. Test it with 20 "vt100" patterns +# against a five /etc/termcap files. Tests using more elaborate patters +# would also be interesting. Your code should not break if given hundreds +# of regular expressions or binary files to scan. + +# This implementation: +# - combines all patterns into a single one using ( ... | ... | ... ) +# - reads patterns from stdin, scans files given as command line arguments +# - produces output in the format <file>:<lineno>:<line> +# - is only about 2.5 times as slow as egrep (though I couldn't run +# Tom's test -- this system, a vanilla SGI, only has /etc/terminfo) + +import string +import sys +import re + +def main(): + pats = map(chomp, sys.stdin.readlines()) + bigpat = '(' + '|'.join(pats) + ')' + prog = re.compile(bigpat) + + for file in sys.argv[1:]: + try: + fp = open(file, 'r') + except IOError, msg: + print "%s: %s" % (file, msg) + continue + lineno = 0 + while 1: + line = fp.readline() + if not line: + break + lineno = lineno + 1 + if prog.search(line): + print "%s:%s:%s" % (file, lineno, line), + +def chomp(s): + return s.rstrip('\n') + +if __name__ == '__main__': + main() |