#!/bin/sh
# @(#)filter_update.sh 2001/07/18 Fredrich P. Maney <maney@maney.org>
#      Based on procmail_mindspring 1999/07/14 Anne Bennett
#
# Download procmail filters from remote site, reformat it for our site,
# and incorporate it into our filters. If there are any changes, report
# the differences.
#
# Last modified: 2001/07/18 Fredrich P. Maney
#                  (generalized for multiple providers)
#                2000/02/18 Anne Bennett
#                  (removed entries with no "@" sign or ".")
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#
# Environment Variables

PATH=/bin:/usr/bin:/sbin:/usr/sbin
export PATH

thisprog="filter_update.sh"
provider=`basename $0`
case $provider in
   concordia )
      url="http://alcor.concordia.ca/topics/email/auto/procmail/spam/tag.html"
   mindspring )
      url="http://cgi.mindspring.com/cgi-bin/spamlist.pl"
      ;;
   panix )
      url="http://www.panix.com/rc.shared"
      ;;
esac

logger="logger -p mail.info ${thisprog}:${provider}:"

mailto="maney"
lynx="/usr/local/bin/lynx"
filter_dir="/etc/mail/filters/tmp/"
cur_filter="${filter_dir}/procmailrc-${provider}"
new_filter="${filter_dir}/procmailrc-${provider}.NEW"

tmp_new=/tmp/${thisprog}.$$.tmp_new
tmp_old=/tmp/${thisprog}.$$.tmp_old
tmp_diff=/tmp/${thisprog}.$$.tmp_diff
tmp_raw=/tmp/${thisprog}.$$.tmp_raw

all_tmp="${tmp_new} ${tmp_old} ${tmp_diff} ${tmp_raw}"

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#
# Start.
#
${logger} starting.

# Download the provider material; note the date and time.
#
${lynx} -dump -source ${url} > ${tmp_raw}
down_date=`date '+%Y/%m/%d %H:%M:%S %Z'`

# If we got nothing, stop now.
#
if [ ! -s ${tmp_raw} ]
then
  echo "Empty or failed download" | \
    mailx -s "New ${provider} procmail data FAILURE" ${mailto}
  rm -f ${all_tmp}
  ${logger} end of run, failed.
  exit
fi

# Format the provider material.
#
touch ${tmp_new}

# (Pick out envelope senders.)
< ${tmp_raw} sed -n '/<h3>Envelope Senders<\/h3>/,/<\/pre>/p' \
   | sed -e '/^</d'  \
         -e '/^[        ]*$/d' \
         -e 's%\.%\\.%g' \
         -e 's%\[%\\[%g' \
         -e 's%\]%\\]%g' \
   | sort -u \
   | egrep '@|\.' \
   | gawk '{printf(":0 Hf\n* ^Received: from\\<%s\\>\n| formail -b -f -A \"$trash_header siteban per-${provider} envelope sender\"\n\n",$1)}' \
   >> ${tmp_new}

# (Pick out header senders.)
< ${tmp_raw} sed -n '/<h3>Header Senders<\/h3>/,/<\/pre>/p' \
   | sed -e '/^</d'  \
         -e '/^[        ]*$/d' \
         -e 's%\.%\\.%g' \
         -e 's%\[%\\[%g' \
         -e 's%\]%\\]%g' \
   | sort -u \
   | egrep '@|\.' \
   | gawk '{printf(":0 Hf\n* ^From:.*\\<%s\\>\n| formail -b -f -A \"$trash_header siteban per-${provider} header sender\"\n\n",$1)}' \
   >> ${tmp_new}

# If the result is empty, report the problem and stop.
#
if [ ! -s ${tmp_new} ]
then
  echo "Empty after formatting" | \
    mailx -s "New ${provider} procmail data FAILURE" ${mailto}
  rm -f ${all_tmp}
  ${logger} end of run, failed.
  exit
fi

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#
# Ok, now that we have the new provider material and have it in the proper
# format, let's compare the new material to the old and look for differences.

# Isolate our current ${provider}-derived material.
#
cat ${cur_filter} \
   | sed -n '/^# ========== ${provider}-derived material below ==========$/,$p' \
   | tail +2 \
   >> ${tmp_old}

# Are they different? 
# If no, clean up and exit quietly.
# If yes, prepare the new file and notify the maintainer of the changes.
diff ${tmp_old} ${tmp_new} > ${tmp_diff}
if [ ! -s ${tmp_diff} ]; then
   # No changes.
   ${logger} no changes.
   rm -f ${all_tmp}
   ${logger} end of run.
   exit
fi

${logger} changes found.
cat ${cur_filter} \
   | sed -n '1,/^# ========== ${provider}-derived material below ==========$/p' \
   | sed -e "s%^# Last download on .*%# Last download on ${down_date}%" \
   > ${new_filter}
cat ${tmp_new} >> ${new_filter}

${logger} notifying maintainer.
( echo ""
  echo "New ${provider} procmail data is in ${new_filter}"
  echo ""
  echo "Differences follow:"
  echo ""
  echo "===================================="
  echo ""
  cat ${tmp_diff}
) | mailx -s "New ${provider} procmail data ready" ${mailto}


# Clean up and leave
rm -f ${all_tmp}
${logger} end of run.
exit

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=#
# EOF

