#!/bin/ksh
#
# CLPOST is a ksh script that performs various Craigslist posting functions.
# Minimally, it can add or remove postings from the Craigslist website at
# http://www.craigslist.org/. This script is created to automate the proccess
# and makes life easier for Craigslist postings.
#
# IDEAL STEPS TO POST ON CRAIGSLIST
# 1. Post the messages using the interface  @ http://name.domain.org/cl/
# 2. When CL email arrive, use the command: $ clpost -a
# 3. Check that posts have been published : $ clpost -p
# 4. Finally, remove the postings from CL : $ clpost -d
# 5. Repeat.
#
# Copyright (c) Yee Hsu 2004. All rights reserved.
# ===========================================================================

# some global variables
CLURL="http://www.craigslist.org"                       # Craigslist URL
TEMP="/tmp/.tmp_cle"                                    # temporary CL file
CLE="$HOME/bin/CL/cl_url"                               # temporary CL file
RESUME="$HOME/private/Job/resume.htm"                   # resume path
PHPPROG="$HOME/bin/CL/EHTML/htmlemail.php"              # PHP program path
FROM="name@domain.org"                                  # reply address

# HEADER
# header displays the header information
# ===========================================================================

header() {
        /usr/bin/clear
        /usr/bin/printf "CLPOST is a script to perform various Craigslist posting functions.\n"
        /usr/bin/printf "This script is compatible with the site $CLURL/.\n"
        /usr/bin/printf "Copyright (c) Yee Hsu 2004. All rights reserved.\n\n"
}

# USAGE
# usage displays the usage information to the user
# ===========================================================================

usage() {
        header;         # display header information
        /usr/bin/printf "Usage: clpost [-option]\n"
        /usr/bin/printf "<options>\n"
        /usr/bin/printf "\t-a\tadd and publish the CL posts\n"
        /usr/bin/printf "\t-p\tdisplay posts that is published\n"
        /usr/bin/printf "\t-d\tdelete all CL posts\n"
        /usr/bin/printf "\t-m\tlogs CL metrics in XML\n"
        /usr/bin/printf "\t-r\tauto send resumes to CL job postings\n\n"
}

# ADDPOST
# addpost is a function that adds and publishes the postings from Craigslist.
# ===========================================================================

addpost() {
TEMP1="$HOME/bin/CL/.cl_tmp1"                           # temporary CL file
TEMP2="$HOME/bin/CL/.cl_tmp2"                           # temporary CL file

header;         # display header information
grep "$CLURL" /mail/$USER | grep -v "<br>" > $TEMP1     # extract posting URLs

# Test that file exist and size > 0
if [[ ! -s "$TEMP1" ]] ; then
        /bin/rm -f $TEMP1
        /bin/echo "No CL postings needed to be added."
        exit 0;
fi

while read LINE
do
        # Compose the Craigslist URL Query String
        URL=`echo $LINE | awk '{print $1}'`             # URL
        POSTINGID=`echo $URL | awk -F/ '{print $6}'`    # posting ID
        CHECK=`echo $URL | awk -F/ '{print $7}'`        # check ID

        # Concatenate the URL string and perform a HTTP GET request
        # The result is the post will be added to Craigslist
        POSTURL="$CLURL/cgi-bin/smp?postingID=$POSTINGID&check=$CHECK&button=publish"
        /usr/pkg/bin/wget -q -O $TEMP "$POSTURL"        # CL HIT

        # The result above returns a *.html page, now extract the subject
        SUBJECT=`grep '<h2>' $TEMP`             # select the subject line
        SUBJECT="${SUBJECT#????????????}"       # shift >> 12
        /bin/echo "POSTED: $SUBJECT"            # display subject

        /bin/cat /dev/null > $TEMP              # flush the temporary file
done < $TEMP1

/bin/cat "$CLE" "$TEMP1" > "$TEMP2"             # concatenate the CL files
/bin/mv -f "$TEMP2" "$CLE"                      # rename it back
/bin/rm -f "$TEMP1"                             # remove temporary CL file
}

# DISPOST
# dispost is a function that display the postings from Craigslist.
# ===========================================================================

dispost() {
header;         # display header information

if [[ ! -s "$CLE" ]] ; then             # Test that file exist and size > 0
        /bin/echo "No posting on Craigslist at this time."
        exit 0;
fi

while read LINE
do
        # Get the URL of the post and perform an HTTP GET request
        URL=`echo $LINE | awk '{print $1}'`             # URL
        /usr/pkg/bin/wget -q -O $TEMP "$URL"            # CL HIT

        # The result above returns a *.html page, now extract the subject
        SUBJECT=`grep '<h2>' $TEMP`             # select the subject line
        SUBJECT="${SUBJECT#????????????}"       # shift >> 12
        /bin/echo "PUBLISHED: $SUBJECT"         # display subject

        /bin/cat /dev/null > $TEMP      # flush the temporary file
done < $CLE
}

# RMPOST
# rmpost is a function that removes the postings from Craigslist.
# ===========================================================================

rmpost() {
header;         # display header information

if [[ ! -s "$CLE" ]] ; then             # Test that file exist and size > 0
        /bin/echo "All CL postings have been removed."
        exit 0;
fi

while read LINE
do
        # Compose the Craigslist URL Query String
        URL=`echo $LINE | awk '{print $1}'`             # URL
        POSTINGID=`echo $URL | awk -F/ '{print $6}'`    # posting ID
        CHECK=`echo $URL | awk -F/ '{print $7}'`        # check ID

        # Concatenate the URL string and perform a HTTP GET request
        # The result is the post will be removed from Craigslist
        POSTURL="$CLURL/cgi-bin/smp?postingID=$POSTINGID&check=$CHECK&button=delete"
        /usr/pkg/bin/wget -q -O $TEMP "$POSTURL"        # CL HIT

        # The result above returns a *.html page, now extract the subject
        SUBJECT=`grep 'Your posting' $TEMP`             # extract title
        SUBJECT=`echo $SUBJECT | awk -F\" '{print $2}'` # extract subject
        /bin/echo "DELETED: $SUBJECT"                   # display subject

        /bin/cat /dev/null > $TEMP              # flush the temporary file
done < $CLE

/bin/cat /dev/null > $CLE                       # flush the local CL URL file
}

# CLMETRICS
# clmetrics reads the web server log files and extracts information on
# Craigslist hits and visits. The information is stored in an XML file
# which would be presented using an XSL document.
# This function should be called once a day and preferably automated by
# a cron job. The results can be seen @ http://yeehsu.freeshell.org/cl/
# ===========================================================================

clmetrics() {
CLLOGDATE="$HOME/bin/CL/cldate.log"             # serves as a LOCK file
CLLOGTEMP="$HOME/bin/CL/cltemp.log"             # temporary XML file
CLXMLFILE="$HOME/public_html/cl/clmetrics.xml"  # public XML file
ACCESSLOG="/sys/httplogs/ukato-accesslog.o"     # web server access log
DATE=`date '+%m/%d/%y'`                         # return date in: "MM/DD/YY"

if [[ `cat $CLLOGDATE` = $DATE ]] ; then        # measure to make sure function
        exit 0;                                 # is executed at most once
fi                                              # a day by comparing lockfiles

if [[ ! -f "$ACCESSLOG" ]] ; then               # acccess log does not exist
        exit 0;                                 # thus, exit
fi

# Access the logfiles and returns the HIT counts of the following pages/files.
HITS01=`grep $USER $ACCESSLOG | grep "normal.gif"  | wc -l`
HITS02=`grep $USER $ACCESSLOG | grep "bald.gif"    | wc -l`
HITS03=`grep $USER $ACCESSLOG | grep "gothic.gif"  | wc -l`
HITS04=`grep $USER $ACCESSLOG | grep "date.gif"    | wc -l`
HITS05=`grep $USER $ACCESSLOG | grep "friend.gif"  | wc -l`
HITS06=`grep $USER $ACCESSLOG | grep "tvb.gif"     | wc -l`
HITS07=`grep $USER $ACCESSLOG | grep "hiking.gif"  | wc -l`
HITS08=`grep $USER $ACCESSLOG | grep "buylist.gif" | wc -l`
HITS09=`grep $USER $ACCESSLOG | grep "sex.gif"     | wc -l`
HITS10=`grep $USER $ACCESSLOG | grep "gfexp.gif"   | wc -l`
HITS11=`grep $USER $ACCESSLOG | grep "resume.gif"  | wc -l`
HITS12=`grep $USER $ACCESSLOG | grep "contract.gif"| wc -l`
HITS13=`grep $USER $ACCESSLOG | grep "hacker.gif"  | wc -l`

# Create an XML file to store the data of hit counts which will be presented
# with a corresponding XSL file. The results can be seen at the site:
# http://yeehsu.freeshell.org/cl/
# ############# BEGIN XML FILE                  ###########################
/usr/bin/printf "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>\n" > $CLLOGTEMP
/usr/bin/printf "<?xml-stylesheet type=\"text/xsl\" href=\"clmetrics.xsl\"?>\n\n" >> $CLLOGTEMP
/usr/bin/printf "<clmetrics>\n"                 >> $CLLOGTEMP   
/usr/bin/printf "<record>"                      >> $CLLOGTEMP   # REC element
/usr/bin/printf "<date>$DATE</date>"            >> $CLLOGTEMP   # date
/usr/bin/printf "<hits01>$HITS01</hits01>"      >> $CLLOGTEMP   # all HITs
/usr/bin/printf "<hits02>$HITS02</hits02>"      >> $CLLOGTEMP
/usr/bin/printf "<hits03>$HITS03</hits03>"      >> $CLLOGTEMP
/usr/bin/printf "<hits04>$HITS04</hits04>"      >> $CLLOGTEMP
/usr/bin/printf "<hits05>$HITS05</hits05>"      >> $CLLOGTEMP
/usr/bin/printf "<hits06>$HITS06</hits06>"      >> $CLLOGTEMP
/usr/bin/printf "<hits07>$HITS07</hits07>"      >> $CLLOGTEMP
/usr/bin/printf "<hits08>$HITS08</hits08>"      >> $CLLOGTEMP
/usr/bin/printf "<hits09>$HITS09</hits09>"      >> $CLLOGTEMP
/usr/bin/printf "<hits10>$HITS10</hits10>"      >> $CLLOGTEMP
/usr/bin/printf "<hits11>$HITS11</hits11>"      >> $CLLOGTEMP
/usr/bin/printf "<hits12>$HITS12</hits12>"      >> $CLLOGTEMP
/usr/bin/printf "<hits13>$HITS13</hits13>"      >> $CLLOGTEMP
/usr/bin/printf "</record>\n"                   >> $CLLOGTEMP   # end REC
/usr/bin/grep   "<record>" $CLXMLFILE           >> $CLLOGTEMP   # old records
/usr/bin/printf "</clmetrics>\n"                >> $CLLOGTEMP   # end XML file

/bin/mv -f $CLLOGTEMP $CLXMLFILE                # replace temp XML to public
/bin/chmod 0644 $CLXMLFILE                      # give web access permission
/bin/echo $DATE > $CLLOGDATE                    # update lockfile
}

# RESUME
# resume is a function that auto sends my resume to CL job postings.
# this function should be used SPARINGLY since i usually send resume manually
# this is the resume DRIVER
# ===========================================================================

resume() {
header;         # display header information

# Precautious measure to prevent accidental spamming of resume.
/bin/echo "Are you sure you want to send resume to ALL Craigslist jobs?"
/bin/echo -n "ANS (y/n): "                      # ask for confirmation
read INPUT                                      # read user input

if [[ "$INPUT" != "y" ]] ; then                 # conditional testing
        /usr/bin/printf "\nExit.\n"             # exit program if no
        return;                                 # otherwise send resumes out!
fi

TEMP1="$HOME/bin/CL/.cl_tmp1"                   # temporary CL file
TEMP2="$HOME/bin/CL/.cl_tmp2"                   # temporary CL file

# Arrays of CL jobs URL to auto send resume on
# Can add additional URLs by including more elements in the array

# For Software / QA / Programming jobs
CL_JOB[1]="http://www.craigslist.org/sof/"
CL_JOB[2]="http://www.craigslist.org/sof/index100.html"
CL_JOB[3]="http://www.craigslist.org/sof/index200.html"
CL_JOB[4]="http://www.craigslist.org/sof/index300.html"

# For Web development / Design jobs
CL_JOB[5]="http://www.craigslist.org/art/"
CL_JOB[6]="http://www.craigslist.org/art/index100.html"
CL_JOB[7]="http://www.craigslist.org/art/index200.html"
CL_JOB[8]="http://www.craigslist.org/art/index300.html"

# loop through array and send resume on all CL job URLs
for CL_JOBS_URL in ${CL_JOB[@]} ; do
        resumed;                                # send the email!
done

/bin/rm -f $TEMP1                               # remove temporary CL file
/bin/rm -f $TEMP2                               # remove temporary CL file
}

# RESUMED
# resumed is a function that auto sends my resume to CL job postings.
# this function should be used SPARINGLY since i usually send resume manually
# ===========================================================================

resumed() {
# download the job listings on CL
/usr/bin/printf "\nURL: $CL_JOBS_URL\n"
/usr/pkg/bin/wget -q -O $TEMP1 "$CL_JOBS_URL"
/bin/cat $TEMP1 | grep "href" | grep "nbsp" | grep -v "table" > $TEMP2

while read LINE
do
        # extract the URL from the downloaded file
        URL=`echo $LINE | awk -F\" '{print $2}'`        # GET href URL
        URL="$CLURL$URL"                                # concatenate CL URL

        # extract the job subject
        SUBJECT="${LINE#??????????????????????????????????????????}"    # >> 43
        SUBJECT=`echo $SUBJECT | awk -F\< '{print $1}'` # select subject

        # extract the posted job email address
        /bin/cat /dev/null > $TEMP1                     # flush temporary file
        /usr/pkg/bin/wget -q -O $TEMP1 "$URL"           # GET job posting
        EMAIL=`cat  $TEMP1 | grep "<hr>" | grep "Reply to" | grep "mailto"`
        EMAIL=`echo $EMAIL | awk -F\" '{print $2}'`     # get mailto URL
        EMAIL=`echo $EMAIL | awk -F\? '{print $1}'`     # strip QUERY_STRING
        EMAIL="${EMAIL#???????}"                        # strip mailto string
        EMAIL=`$HOME/bin/CL/clemaildecoder "$EMAIL"`    # decode email addr

        if [[ "$EMAIL" != "" ]] ; then                  # email the resume!
                # Uncomment the next line of code to auto-send resumes out.
                # This is commented out to prevent accidental SPAMMING
                # /usr/bin/mail -s "$SUBJECT" "$EMAIL" < "$RESUME"      # ASCII
                # /usr/pkg/bin/php "$PHPPROG" "$EMAIL" "$FROM" "$SUBJECT" "$RESUME"     # HTML EMAIL
                /bin/echo "RESUMED: $SUBJECT"
        fi
done < $TEMP2
}


# MAIN
# this part is really where the script start executing.
# ===========================================================================

# make sure argument is not empty
if [[ "$1" = "" ]] ; then
        usage;
        exit 0;
fi

# check script argument to determine which function to perform
case "$1" in
        -a) addpost;    exit 0;;        # add and publishes the CL posts
        -p) dispost;    exit 0;;        # display posts that is published
        -d) rmpost;     exit 0;;        # deletes the CL postings
        -m) clmetrics;  exit 0;;        # log CL hits/visits for metrics
        -r) resume;     exit 0;;        # auto send resumes to CL job postings
         *) usage;      exit 0;;        # usage information
esac

/bin/rm -f $TEMP        # delete the external CL temporary file

exit 0;;