各位先进
最近初接触爬虫
想用 Beautifulsoup 抓类似下面网站的内容
http://propaccess.trueautomation.com/clientdb/?cid=81
但送了 Post 以后却无回传值 看起来是没有让服务器收到form data
想向各位求救 帮忙看看 code 哪里需要做修正
目前想依照 Owner name 搭配 Advanced 里面的显示条件来爬虫
import requests
from bs4 import BeautifulSoup
import re
from decimal import Decimal
import pandas as pd
import urllib
index_url = 'http://propaccess.trueautomation.com/clientdb/?cid=81'
session = requests.Session()
#Get session cookies (session ID)
index_request = session.get(index_url)
r = urllib.request.urlopen(index_url)
soup = BeautifulSoup(r, 'lxml')
viewstate = soup.findAll("input", {"type": "hidden", "name": "__VIEWSTATE"})
viewstategenerator = soup.findAll(
"input", {"type": "hidden", "name": "__VIEWSTATEGENERATOR"})
eventvalidation = soup.findAll(
"input", {"type": "hidden", "name": "__EVENTVALIDATION"})
formdata = {
"propertySearchOptions%3AsearchType:": "Owner Name",
"propertySearchOptions%3AownerName": 'smith',
"propertySearchOptions%3Ataxyear": "2016",
"propertySearchOptions%3ApropertyType": 'Mineral',
"propertySearchOptions%253AorderResultsBy": "Owner Name",
"propertySearchOptions%253ArecordsPerPage": "250",
"__EVENTVALIDATION": eventvalidation[0]['value'],
"__VIEWSTATE": viewstate[0]['value'],
"__VIEWSTATEGENERATOR": viewstategenerator[0]['value'],
"propertySearchOptions%253Asearch": "Search"}
response_post = session.post(index_url, data= formdata)
soup_post = BeautifulSoup(response_post.text, 'lxml')
感谢大神们