#!/usr/bin/env python # -*- coding: UTF-8 -*- #Author: Roy L Zuo (roylzuo at gmail dot com) #Last Change: Wed Nov 26 12:37:24 2008 EST #Description: 根据yingjiesheng搜索关键字结果,群发简历,并保存已投 # 职位具体信息至指定文件夹 import urllib2, re, os, shelve, time searches = [['linux', 'python'], ['unix','python'],['linux','金融'], ['unix','金融'],['linux','finance'], ['unix','finance'], ] savepath = '%s/workspace/career/buster' %os.environ['HOME'] def getLatestJobs(keywords): '''搜索最新工作列表,与以投列表对照,并返回未投工作之链接''' #TODO: compare with saved pages url0 = "http://s.yingjiesheng.com/result.jsp?keyword=%s&period=3&sort=&jobtype=1" %'+'.join(keywords) url = url0+"&start=0" page = urllib2.urlopen(url).read() match = re.search("共找到(.*)条记录",page) if not match: return results = re.findall('

.*?

',page) for i in range(int(match.group(1))/10): nurl=url0+"&start=%d0" %(i+1) npage = urllib2.urlopen(nurl).read() results.extend(re.findall('

.*?

',npage)) return results def getEmailAddress(url, savepath): '''查找页面,看是否有email地址,返回email地址''' page = urllib2.urlopen(url).read() match = re.search("(\w+(?:[-+.]\w+)*@\w+(?:[-.]\w+)*\.\w+(?:[-.]\w+)*)",page) if not match: return #保存 savedir = '%s/%s' %(savepath,time.strftime("%y-%m-%d")) if not os.path.exists(savedir): os.mkdir(savedir) file = open("%s/%s" %(savedir,url.split("/")[-1]),'w') file.write(page) file.close() return match.group(1) if __name__=='__main__': import sys #import socket #sys.path.append("%s/workspace/python/lib" %os.environ['HOME']) #from threadmanager import WorkerManager #socket.setdefaulttimeout(10) joblist=[] #wm = WorkerManager(30) for item in searches: #wm.add_job(getLatestJobs, item) #wm.wait_for_complete() #joblist = wm.get_result() links = getLatestJobs(item) if links is not None: joblist += getLatestJobs(item) joblist=list(set(joblist)) submitted = shelve.open("%s/submitted" %savepath) emails=[] for url in joblist: if submitted.has_key(url): continue #print url e = getEmailAddress(url, savepath) #print e if e: emails.append(e) submitted[url]=e emails=list(set(emails)) submitted.close() sender="Le Zuo (Roy) " attachment="/home/roylez/workspace/career/doc/resume.pdf" subject="应聘" mutt = "mutt -s'%s' -e'set from=\"%s\"' -a'%s' %s <$HOME/doc/letter.txt" subemails = shelve.open("%s/emails" %savepath) for e in emails: #使用mutt发送简历,内容为文件模板内容,自动添加附件 if subemails.has_key(e): continue print "Submitting to %s ..." %e os.system(mutt %(subject,sender,attachment,e)) subemails[e]=''