最近在练习写爬虫,又被一些基本观念的问题给卡住了,以下是威秀影城的爬虫程式码。
#复仇者联盟页面
MOVIE_URL = 'http://www.vscinemas.com.tw/vsTicketing/ticketing/ticket.aspx?cin
ema=1|TP&movie=HO00006836'
#确认URL存在
def get_web_page(url):
resp = requests.get(url)
if resp.status_code != 200:
print('无效的URL:',resp.url)
return None
else:
return resp.text
#找出电影资讯的模组
def get_movie(dom):
soup = BeautifulSoup(dom,'html5lib')
movies = []
divs = soup.find('ul','bookList').find_all('li')
for div in divs:
movie = dict()
movie['name'] = soup.find('div','movieDescribe').h1.text
movie['date'] = div.parent.find_previous_sibling('h4').text
movie['time'] = div.text
movies.append(movie)
return movies
#主程式模组
def main():
page = get_web_page(MOVIE_URL)
if page:
s = get_movie(page)
print(s)
for a in s:
print(a['name'],a['date'],a['time'])
if __name__ == '__main__':
main()