##### strip_egroups_ad.rc # # Resource file for procmail. Run with: # INCLUDERC=yourpath/strip_egroups_ad.rc # in your $HOME/.procmailrc # # Removes the first ad in eGroups list emails. # First and last line of ad are matched by $start and $end, and some string # inside the ad by $admatch. The whole ad is replaced by $note. # # The usual headache with bare-bones Unix-rubbish: the sed solution never(!) # works under Solaris 2.7 because -e handles neither newlines nor nested # '{ }'-lists. Solaris awk is also too dumb - nawk is required. # Needless to say, the GNU tools never have a problem. Long live the Penguin! # # The latest version is always available from: # http://volker.orcon.net.nz/soft/procmail/strip_egroups_ad.rc # # In the public domain. # Volker Kuhlmann # 31 Aug 2000; 12 Jan; 2 Feb; 4, 29 Mar 2001; 10, 11, 14 Apr 2002 # :0 * ^Delivered-To:.*@(yahoogroups|egroups)\.com * ^Mailing-List:.*@(yahoogroups|egroups)\.com # With awk (change to nawk, gawk etc. if necessary, e.g. solaris awk is bad): # 10Apr02: Arrrgh, with nawk/gawk one has to use "\\." to get a literal "."! # The replacement string can now be empty, i.e. note=''. # { # detect version of awk to use, in order: gawk, nawk, awk :0 i * AWK ?? ^^^^ AWK=| sh -c 'for awk in gawk nawk awk; do \ ($awk /dev/null >&2 \ && { echo $awk; break; } done' # or set variable here and comment out the 5 lines above: #AWK=gawk end='...-----------------.*-----------------([-~>=_|e]*[~>=_|e]|[^-].*-)$' start="^$end" admatch='http://.*\\.(yahoo|egroups)\\.com/.*\\/' note='[obnoxious Yahoo/eGroups ad removed]\n' :0 fbw * ! AWK ?? ^^^^ | $AWK "\ BEGIN { ad=0; done=0 }\ done { print; next }\ ad && \$0 ~ \"$end\" { \ ad=0; \ if (match(text,\"$admatch\")) {\ printf \"%s\", \"$note\"; done=1\ } else {\ print text \$0; text=\"\" }\ next\ }\ \$0 ~ \"$start\" { ad=1 }\ ad { text=text \$0 \"\n\" }\ !ad { print }\ " } # With sed: # # Write $start and $end to match the whole line, but do not(!) anchor $end at # the start of the line using "^". # The /$admatch/! condition is necessary to remove the first ad line, in case # $end also matches the start line. # The conditions should be reasonably broad, and still catch if egroups changes # some characters in the lines. # # Adopted from the sed FAQ: # :t # /BLOCK_TOP/,/BLOCK_END/ { # /BLOCK_END/! { N; b t; } # /regex/s/^.*BLOCK_END// # } # Suppose the beginning of the block is indicated by 'BLOCK_TOP' and # the end of the block is indicated by 'BLOCK_END'. If the expression # 'regex' appears anywhere within the block, the entire block should # be deleted. # The most difficult part was to get the quoting right for procmail... # Note: this doesn't trigger on old email with egroups.com URL in the ad any # more. # #{ # end='...-\{20,\}.*-\{20,\}[-~>=_|e]*$' # start="^$end" # admatch='http:\/\/.*\.yahoo\.com\/.*\/' # note='\[obnoxious Yahoo\/eGroups ad removed\]' # :0 fbw # | sed \ # -e ':t' \ # -e "/$start/,/^$end/ { \ # /^.*$end/! { N; b t; }; \ # /$admatch/! { N; b t; }; \ # /$admatch/ { \ # s/^.*$end/$note/ ; \ # :tt; \ # n; b tt; \ # }; \ # }" #} ##### EOF strip_egroups_ad.rc