Python3-Scrapy笔记4 CooKit使用

scrapy coolkit使用 模拟登录

使用写死cookit的方式来爬取数据

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
class OschinaSpider(scrapy.Spider):
name = 'oschina'
allowed_domains = ['oschina.net']
start_urls = ['https://my.oschina.net/gavinnie']

def start_requests(self):
cookits = "_user_behavior_=xxxxxxxxxxxxxx; _ga=GA1.2.xxxxxxx.1552645009; bad_id8387c580-a888-11e5-bc38-bb63a4ea0854=73796a22-470b-11e9-a0c5-9766216bb6aa; visit-detail-banner-ad-0320-szych=1; Hm_lvt_cb47adfe0fabd7059a2a90a495077efe=1554200836,1554256670,1554813667,1554813698; Hm_lvt_a411c4d1664dd70048ee98afe7b28f0b=1555308407,1555318569,1555319447,1555338465; aliyungf_tc=AQAAANO5GWRlewQAsORuccmERgN2ipPj; gr_user_id=39a8b44b-fae8-43ff-966e-374816c37b9a; 89b266b986554bc7_gr_session_id=93c4167a-6670-4bbc-a52c-21ae2c8d48d7; grwng_uid=17517646-cbd9-43dd-bbff-0f4b5bf69fe0; 89b266b986554bc7_gr_session_id_93c4167a-6670-4bbc-a52c-21ae2c8d48d7=true; _reg_key_=Y0IxI5rVHdjoBrXab2fR; oscid=89E3wV7GHESCHLDqjKKdQYapYTwu6wBbSEObMTaT8TnrGyo%2BeRuOaVU7xdj4S6RyHS%2F%2FjOyoeno%2BMURY4CGyfrWKBtf7pJ9CGHOq%2Fmw%2Baz2M%2BsLIy1iP%2Fjl0T7qX9n2kh3DfkVrBP3o6jNXfwqRPPHd5x43%2F3ZreafGbbRnFcaQ%3D; Hm_lpvt_a411c4d1664dd70048ee98afe7b28f0b=xxxxxxxxxxx"

cookits = {i.split("=")[0]:i.split("=")[1] for i in cookits.split("; ")}
print(cookits);

print(self.start_urls[0])
yield scrapy.Request(
self.start_urls[0],
callback=self.parse,
cookies=cookits
)

def parse(self, response):
print(response.status)
pass

使用post请求

1
2
3
4
5
6
7
8
9
10

# 使用
scrapy.FormRequest #构建一个Post请求

#或者
scrapy.FormRequest.form_response(
response,#自动寻找form表单
formata={"login":'账号',"pwd":'密码'}
callback=self.parse
)