python 版本,需要 PIL 跟 pycurl ,将就用吧..
#!coding=utf-8
import os
import time
import math
import pycurl
import operator
from PIL import Image
from StringIO import StringIO
def main():
picUrl = r"https://event.franklin.com.tw/C2014_11_TGF/showimg.aspx?date="
path = os.path.normpath(os.path.dirname(__file__)+"/pic")
noPic = Image.open(os.path.join(os.path.dirname(__file__), "no.jpg"))
noH = noPic.histogram()
if os.path.isdir(path):
pass
else:
os.makedirs(path)
print "目录 "+path+" 不存在,产生新目录."
print "图片将储存于 "+path+" 目录."
for y in xrange(1951, 2015):
y = str(y)
for m in xrange(1, 12):
if m < 10:
#1951年9月16日前资料不存在所以跳过,写法不是很好,将就一下
if m < 9 and y == "1951":
continue
m = "0"+str(m)
else:
m = str(m)
print "开始撷取 "+y+"年"+m+"月."
for d in xrange(1, 31):
if d < 10:
date = y+m+"0"+str(d)
else:
date = y+m+str(d)
savefile = os.path.normpath(path+"/"+date+".jpg")
#print savefile
#图片已存在或已下载就略过
if os.path.isfile(savefile):
print savefile+"已存在."
continue
else:
#尝试取得图片
try:
buffer = StringIO()
c = pycurl.Curl()
c.setopt(c.URL, picUrl+date)
c.setopt(c.WRITEFUNCTION, buffer.write)
c.perform()
c.close()
except:
#取得图片失败
continue
else:
try:
buffer.seek(0)
im = Image.open(buffer)
imH = im.histogram()
#比对图片,数字越大说明相差的越大,相似度100%接近860
rms = math.sqrt(reduce(operator.add,
list(map(lambda a,b:(a-b)**2, noH, imH)))/len(noH)) )
#print rms
if rms > 870:
#图片不相同
im.save(savefile, 'JPEG')
else:
#图片比对相同就跳过
continue
except:
continue
else:
#图片储存成功
print time.strftime("%Y-%m-%d %H:%M:%S",
time.localtime())+" 储存 "+savefile+" 成功."
if __name__ == '__main__':
main()