Spam Prefiltering

From CodeCodex

This Python script connects to a specified POP3 server (for example, the one provided by your ISP) and does prefiltering of mail messages, deleting all those that match certain criteria. After running this script, you can then launch your regular MUA to download the remaining messages.

The script checks a spam indicator set by my ISP's spam filter (the settings may be different for your ISP), and includes some other simple inbuilt rules which I found personally useful for filtering out a lot of spam. But its real power comes through invocation of SpamAssassin to do more thorough checking of the message content. Commonly you would run SpamAssassin on your own mail server, but this script shows how you can make use of it on a desktop machine.

Example invocation:

popselfilter --spamassassin pop3.myisp.com myusername

which, after prompting for the password for myusername, will logon to the server named pop3.myisp.com and filter the messages it finds there.

import sys
import os
import re
import subprocess
import tempfile
import poplib
import getopt
import getpass

#+
# Mainline
#-

(Opts, Args) = getopt.getopt \
  (
    sys.argv[1:],
    "",
    [
        "debug=",
        "maxmessages=",
        "port=",
        "spamassassin",
        "startfrom=",
    ]
  )
if len(Args) < 2 or len(Args) > 3 :
    raise getopt.error \
      (
            "usage:\n\t%s <host> <username> [<password>]\n"
        %
            sys.argv[0]
      )
#end if
Host = Args[0]
Username = Args[1]
Password = None
if len(Args) > 2 :
    Password = Args[2]
#end if
Verbosity = 0
Port = 110
MaxMessages = None
StartFrom = None
UseSpamAssassin = False
for Keyword, Value in Opts :
    if Keyword == "--debug" :
        Verbosity = int(Value)
    elif Keyword == "--maxmessages" :
        MaxMessages = int(Value)
    elif Keyword == "--port" :
        Port = int(Value)
    elif Keyword == "--spamassassin" :
        UseSpamAssassin = True
    elif Keyword == "--startfrom" :
        StartFrom = int(Value)
    #end if
#end for
if Password == None :
    Password = getpass.getpass()
#end if

MessageTemp = None
Conn = poplib.POP3(Host, Port)
if Verbosity > 0 :
    sys.stdout.write(Conn.getwelcome() + "\n")
    Conn.set_debuglevel(Verbosity)
    sys.stdout.write(Conn.user(Username) + "\n")
    sys.stdout.write(Conn.pass_(Password) + "\n")
    (MessageCount, MailboxSize) = Conn.stat()
    sys.stdout.write \
      (
            "Message count: %u, mailbox size: %u\n"
        %
            (MessageCount, MailboxSize)
      )
else :
    Conn.user(Username)
    Conn.pass_(Password)
#end if
MessageList = Conn.list()
if len(MessageList) > 2 :
    sys.stdout.write(MessageList[0] + "\n")
    SpamHeader = re.compile(r"X-Spam-Status:\s*yes", re.IGNORECASE)
    RubbishHeaders = \
        [
            re.compile(r"Subject:\s+\[WARNING: VIRUS REMOVED\]", re.IGNORECASE),
            re.compile(r"To:.*?daemon[a-z0-9]*@geek-central", re.IGNORECASE),
            re.compile(r"To:.*?thisisjusttestletter@geek-central", re.IGNORECASE),
        ]
    MessageList = MessageList[1]
    if StartFrom != None :
        MessageList = MessageList[StartFrom - 1:]
    #end if
    if MaxMessages != None and len(MessageList) > MaxMessages :
        MessageList = MessageList[:MaxMessages]
    #end if
    NrMessages = 0
    NrDeleted = 0
    for MessageItem in MessageList :
        (MessageNr, MessageSize) = MessageItem.split(" ")
        MessageNr = int(MessageNr)
        MessageSize = int(MessageSize)
        MessageHeaders = Conn.top(MessageNr, 0)
        if len(MessageHeaders) > 2 :
            NrMessages += 1
            sys.stdout.write \
              (
                    "%u/%u: %s\n"
                %
                    (NrMessages, len(MessageList), MessageHeaders[0])
              )
              # debug
            IsSpam = False
            for HeaderLine in MessageHeaders[1] :
                print HeaderLine # debug
                for ThisRubbish in RubbishHeaders :
                    if ThisRubbish.match(HeaderLine) != None :
                        IsSpam = True
                        break
                    #end if
                #end for
                if not IsSpam and SpamHeader.match(HeaderLine) != None :
                    IsSpam = True
                #end if
            #end for
            if not IsSpam and UseSpamAssassin :
                Contents = Conn.retr(MessageNr)
                if len(Contents) < 2 :
                    raise OSError \
                      (
                            "Attempt to get message contents failed: %s\n"
                        %
                            Contents[0]
                      )
                #end if
                Contents = Contents[1]
                if MessageTemp == None :
                    MessageTemp = tempfile.NamedTemporaryFile().name
                #end if
                SaveMessage = open(MessageTemp, "w")
                for Line in Contents :
                    SaveMessage.write(Line + "\n")
                #end for
                SaveMessage.close()
                Filter = subprocess.Popen \
                  (
                    args = ["/usr/bin/spamc"],
                    stdin = open(MessageTemp, "r"),
                    stdout = subprocess.PIPE
                  )
                for Line in Filter.stdout.readlines() :
                    sys.stdout.write(Line) # debug
                    if Line == "\n" : # end of headers
                        break
                    if SpamHeader.match(Line) != None :
                        IsSpam = True
                        break
                    #end if
                #end for
                Filter.wait()
            #end if
            if IsSpam :
                sys.stdout.write("DELETE %u\n\n" % MessageNr) # debug
                Conn.dele(MessageNr)
                NrDeleted += 1
            #end if
        else :
            raise OSError \
              (
                    "Attempt to get message header failed: %s\n"
                %
                    MessageHeaders[0]
              )
        #end if
    #end for
else :
    raise OSError("Attempt to get message list failed: %s\n" % MessageList[0])
#end if
Conn.quit()
if MessageTemp != None :
    os.unlink(MessageTemp)
#end if
sys.stdout.write \
  (
    "Messages deleted/processed: %u/%u\n" % (NrDeleted, NrMessages)
  )