#################################################### # This is a pattern file for 'typosniff' # It contains regexp words and word fragments to hunt # down in a batch of text files. # # Each line contains an optional grep option (-i. -E, etc.) followed by # a grep-style search pattern. # Hash-marks (#) introduce comments. Comments and blank lines are ignored. # Patterns that include spaces must be quoted. # Quotemarks and periods must be backslash-escaped to be taken literally. # # W10 = Webster's Collegiate Dictionary (10th ed.) # CMS15 = Chicago Manual of Style (15th ed.) #################################################### #################################################### # UK English words and word fragments #################################################### # Words ending in "our" # NB: we can't do a regex search like "[blvnid]our", because although it # would indeed catch the British words below, it would also catch "bourn," # "flourish," "devour," "dour," etc. So we have to list the words # one by one. -i arbour # US: arbor, harbor -i behaviour # US: behavior -i colour # US: color -i endeavour # US: endeavor -i favour # US: favor, favorite, etc. -i fervour # US: fervor -i flavour # US: flavor -i glamor # W10: prefers glamour ! -i honour # US: honor -i humour # US: humor -i labour # US: labor -i neighbour # US: neighbor -i odour # US: odor -i savour # US: savor -i rancour # US: rancor -i saviour # W10 prefers savior -i splendour # US: splendor -i vigour # US: vigor ############################### # -ize or -ise? -yze or -yse? -i 'analyse[^s]' # US: analyze, anlayzed, analyzer -- but analyses may be OK -i alyse # US: moralize, analyze, paralyze -i lysing # US: moralizing, analyzing, paralyzing -i nise # US: recognize, cognize, scrutinize, organize -i nisa # US: organization -i criticise # US: criticize -i utilis # US: utilize, utilization -i practis # US: practice, practicing -i orise # UK: authorise, categorise, memorise, etc. -i sise # US: emphasize -i 'realis[ae]' # US: realize, realization ############################### # c or s? -i defence # US: defense -i licence # US: license ############################### # How many l's? -i 'equall[^y]' # UK "equalling" -i 'fulfil[^l]' # UK: fulfilment; US fulfillment -i counsell # US "counselor, counseling"; UK "counsellor, counselling" -i modell # UK: modeller, modelling -i quarrell # US: quarreled -i signall # US: signaled, signaling, etc. -i skilful # US: skillful -i travell # US: traveler, traveling, traveled -i vell # US: reveler -i enrolment # US: enrollment ############################### # Miscellaneous -i ageing # US: aging -i centre # US: center -i centring # US: centering -i cheque # US: check -i '[^e]cosy' # US: cozy (but ignore "ecosystem") -i draught # US: draft -i fibre # US: fiber -i focuss # US: focused, focusing, etc. -i grey # US: gray -i jewellery # US: jewelry -i judgement # US: judgment -i learnt # US: learned -i lustre # US luster -i meagre # US: meager -i metre # US: meter -i oeu # UK: manoeuvre; US: maneuver -i offence # W10 prefers "offense" -i pae # UK: encyclopaedia, paediatric, etc. # US: encyclopedia, pediatric, etc. -i faec # US: feces, fecal -i plough # US: plow -i pyjama # US: pajama -i catalogue # W10 prefers "catalog" over "catalogue" -i 'dialog[^u]' # W10 prefers "dialogue" over "dialog" -i mould # US: mold -i co-exist # US: coexist ############################### # vocabulary quirks -i lorry # US: truck -i bonnet # US: hood (of automobile) -i roundabout # US: rotary (traffic circle) -i pavement # US: sidewalk -i coach # US: bus #-i carriage # US: car (of train) -i tyre # US: tire -i warders # US: wardens -i aluminium # US: aluminum -i polythene # US: polyethelyene -i tonne # Strictly speaking, a "tonne" is a "metric ton". # Depending on context, "ton" or "tonne" may be correct. For example, "Many # tonnes of steel were unloaded" may as well be "Many tons of steel...". # But a more precise statement, such as "The boulder weighed 17.5 tonnes," # should be either left as-is or changed to "The boulder weighed 17.5 metric # tons." #################################################### # Basic style rules -- mostly from CMS 15 #################################################### -i 'i\.e\.[^,]' # i.e. should always be followed by a comma (CMS15 5.202) -i 'e\.g\.[^,]' # e.g. should always be followed by a comma (CMS15 5.202) -i '[knopr]wards' # afterward, backward, toward, forward, # upward, downward, etc. should not be followed # by s (CMS15 5.202) #################################################### # ATI style idiosyncrasies #################################################### # SUSPECT WORD # ATI PREFERS -i brahmin # brahman Theravadin # Theravadan buddhist # Buddhist lovingkindess # loving-kindness -i email # e-mail (W10; but W10 prefers E-mail, which I don't) -i online # on-line (W10) -i offline # off-line (W10) -i web-site # website -i sub-commentar # subcommentary, etc. -i ajahn # ajaan -i achaan # ajaan -i acharn # ajaan -i non-return # nonreturn (based on W10: nonreturnable) -i arhat # arahant -i 'arahat[^ot]' # arahant (but arahatta, arahato are OK) '[a-z]"m' # Pali m-dot-over is DEPRECATED in favor of m-dot-under ' -- ' # — '[^!\]--[^>]' # — ' [],\.:!;?)]' # punct goes inside the italics '[],\.:!;?)]' # punct goes inside the italics '(' # ( '\[' # [ #################################################### # Punctuation #################################################### '[^=]\"\.[^.]' # period should go inside double quote # (Exception: 'href="../foo.html"') # (Exception: when the "period" is part of an ellipsis) "'\.[^.]" # period should go inside single quote # (Exception: when the "period" is part of an ellipsis) \", # comma should go inside double quote \', # comma should go inside single quote ' \.\.\.' # ellipsis should not be preceded by a space # (Note: this is not necessarily wrong; sometimes we want a space # in certain complex situations. For example: # "I, too, develop mindfulness of death." ... "I think, 'O, that I # Here the space gives the eye a welcome rest from all that punctuation. # These cases are rare enough that it's no great hardship to check for all # space+ellipsis patterns.) '\.\.\.\.' # four dots are not allowed '[a-z]- ' # space after hyphen ' -[a-z]' # space before hyphen ';"' # semicolon should NOT go inside quotes ";'" # semicolon should NOT go inside quotes # Some puncts should never be preceded by white space '[ ],' '[ ];' '[ ]:' '[ ]!' '[ ]?' # Some puncts should never be followed by white space '[(\][ ]' #################################################### # HTML inefficiencies (byte-wasters) #################################################### ' ' # Example:

followed by space -E ']+> +' # Example: blah blah blah -E ' +' # multiple spaces #################################################### # More typos and shady characters #################################################### -i mediation # Double-check that we haven't mispelled "meditation" -i mediator # ditto for "meditator" -i '[^rm]mediate' # ditto for "meditate" -- but not immediate, intermediate -i " adn " # mistyped "and" -i " hte " # mistyped "the" -i "wan't" # mistyped "want" -i " the the " # typo -i togehter # typo " - " # badly formed em-dash