#!/bin/tcsh -f ################################ # jtb 050606 # A rudimentary batch-oriented typo finder and spell-checker. # # Given a file containing a list of grep-style regexp search patterns, # this script searches a bunch of files and reports any occurrences of # those patterns. ################################ # FIXME: Should (optionally?) filter out HTML tags before searching # FIXME: Should be able to use a pattern file other than the default set me=$0:t set patternFile="~/bin/${me}.patterns" # the search patterns live in this file goto Start ############################## Usage: echo "${me}: file1 [file2...]" echo "Searches the given files for occurrences of regexp patterns that are" echo "listed in the pattern list file $patternFile." echo "See $patternFile for the pattern file format. echo goto Cleanup ############################## Start: if ($#argv < 1) then echo "You forgot to name a file to be checked." goto Usage endif if (! -e $patternFile) then echo "Can't find the pattern file '$patternFile'." goto Usage endif # Each non-commented line in the pattern file contains all the args needed for # a grep command, except for the filename and the "grep" itself. For example: # # line in pattern file: dog # actual grep invocation: grep dog xxx (where xxx is a filename) # # line in pattern file: -i the # actual grep invocation: grep -i the xxx # # line in pattern file: -E "criticis(e|ing)" # actual grep invocation: grep -E "criticis(e|ing)" xxx # So let's take the pattern file and turn it into a shell script # that runs grep on one file, for each pattern, one at a time. # (Note: We can't use grep's '-f' option to take the patterns from # the pattern file because we want to allow grep options like '-i' or -'E' # in the pattern file.) set tmpGrepResults = "\/tmp\/${me}.GrepResults" set tmpPatternFile = "/tmp/${me}.PatternFile" set tmpGrepScript = "/tmp/${me}.GrepScript" /bin/rm -f $tmpGrepResults $tmpPatternFile $tmpGrepScript sed -e 's/#.*//' -e 's/[ ][ ]*$//' -e '/^#/d' -e '/^$/d' $patternFile > $tmpPatternFile ed -s $tmpPatternFile << END_INPUT g/(/s//\\(/ g/)/s//\\)/ g/\(^.*\$\)/s//set nLines = `grep -c \\1 \$1`\ grep -n \\1 \$1\ > $tmpGrepResults\ set nLines = \`grep -c \1 \$1\`\ if (! \$status) then\ echo; echo "************ Match found (file '\$1'): |"\\1"| (\$nLines lines): ************"\ set foundMatch\ cat $tmpGrepResults\ endif/ 1i #!/bin/tcsh -f # This tmp script was constructed automagically by ${me} # if (\$#argv != 1) then echo "*** Bad arg count (\$#argv). Aborting script." exit else if (! -e \$1) then echo "*** Can\'t find file \'\$1\'. Aborting script." exit endif echo -n "Testing \$1 ..." . \$a if (\${?foundMatch}) then echo "************************************"; echo else echo " passed." endif . w $tmpGrepScript Q END_INPUT chmod +x $tmpGrepScript # tmpGrepScript is an executable tcsh-script that we'll execute for each file. foreach f ($argv) # loop over the files if (! -e $f) then echo "File '$f' not found. Skipping." continue endif $tmpGrepScript $f # execute the script on this file end Cleanup: /bin/rm -f $tmpGrepResults $tmpPatternFile $tmpGrepScript exit