Skip to content

Commit fdd69cf

Browse files
committed
豆瓣登陆修复
1 parent 8cafe1e commit fdd69cf

File tree

4 files changed

+62
-115
lines changed

4 files changed

+62
-115
lines changed

douban/douban.py

Lines changed: 54 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -1,107 +1,61 @@
11
#!/usr/bin/env python3
22
# -*- coding: utf-8 -*-
3-
4-
'''
5-
Required
6-
- requests (必须)
7-
- bs4 (必选)
8-
- pillow (可选)
9-
'''
10-
11-
from urllib.request import urlretrieve
123
import requests
13-
from bs4 import BeautifulSoup
14-
from os import remove
15-
16-
try:
17-
import cookielib
18-
except:
19-
import http.cookiejar as cookielib
20-
try:
21-
from PIL import Image
22-
except:
23-
pass
24-
25-
url = 'https://accounts.douban.com/login'
26-
27-
datas = {'source': 'index_nav',
28-
'remember': 'on'}
29-
30-
headers = {'Host': 'www.douban.com',
31-
'Referer': 'https://www.douban.com/',
32-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
33-
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
34-
'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
35-
'Accept-Encoding': 'gzip, deflate, br'}
36-
37-
# 尝试使用cookie信息
38-
session = requests.session()
39-
session.cookies = cookielib.LWPCookieJar(filename='cookies')
40-
try:
41-
session.cookies.load(ignore_discard=True)
42-
except:
43-
print("Cookies未能加载")
44-
# cookies加载不成功,则输入账号密码信息
45-
datas['form_email'] = input('Please input your account:')
46-
datas['form_password'] = input('Please input your password:')
47-
48-
49-
def get_captcha():
50-
'''
51-
获取验证码及其ID
52-
'''
53-
r = requests.post(url, data=datas, headers=headers)
54-
page = r.text
55-
soup = BeautifulSoup(page, "html.parser")
56-
# 利用bs4获得验证码图片地址
57-
img_src = soup.find('img', {'id': 'captcha_image'}).get('src')
58-
urlretrieve(img_src, 'captcha.jpg')
59-
try:
60-
im = Image.open('captcha.jpg')
61-
im.show()
62-
im.close()
63-
except:
64-
print('到本地目录打开captcha.jpg获取验证码')
65-
finally:
66-
captcha = input('please input the captcha:')
67-
remove('captcha.jpg')
68-
captcha_id = soup.find(
69-
'input', {'type': 'hidden', 'name': 'captcha-id'}).get('value')
70-
return captcha, captcha_id
71-
72-
73-
def isLogin():
74-
'''
75-
通过查看用户个人账户信息来判断是否已经登录
76-
'''
77-
url = "https://www.douban.com/accounts/"
78-
login_code = session.get(url, headers=headers,
79-
allow_redirects=False).status_code
80-
if login_code == 200:
81-
return True
82-
else:
83-
return False
84-
854

86-
def login():
87-
captcha, captcha_id = get_captcha()
88-
# 增加表数据
89-
datas['captcha-solution'] = captcha
90-
datas['captcha-id'] = captcha_id
91-
login_page = session.post(url, data=datas, headers=headers)
92-
page = login_page.text
93-
soup = BeautifulSoup(page, "html.parser")
94-
result = soup.findAll('div', attrs={'class': 'title'})
95-
# 进入豆瓣登陆后页面,打印热门内容
96-
for item in result:
97-
print(item.find('a').get_text())
98-
# 保存 cookies 到文件,
99-
# 下次可以使用 cookie 直接登录,不需要输入账号和密码
100-
session.cookies.save()
5+
"""
6+
info:
7+
author:CriseLYJ
8+
github:https://github.com/CriseLYJ/
9+
update_time:2019-04-04
10+
"""
11+
12+
"""
13+
模拟登陆豆瓣
14+
"""
15+
16+
17+
class DouBanLogin(object):
18+
def __init__(self, account, password):
19+
self.url = "https://accounts.douban.com/j/mobile/login/basic"
20+
self.headers = {
21+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
22+
}
23+
"""初始化数据"""
24+
self.data = {
25+
"ck": "",
26+
"name": account,
27+
"password": password,
28+
"remember": "true",
29+
"ticket": ""
30+
}
31+
self.session = requests.Session()
32+
33+
def get_cookie(self):
34+
"""模拟登陆获取cookie"""
35+
html = self.session.post(
36+
url=self.url,
37+
headers=self.headers,
38+
data=self.data
39+
).json()
40+
if html["status"] == "success":
41+
print("恭喜你,登陆成功")
42+
43+
def get_user_data(self):
44+
"""获取用户数据表明登陆成功"""
45+
# TODO: 这里填写你用户主页的url
46+
url = "这里填写你用户主页的url"
47+
# 获取用户信息页面
48+
html = self.session.get(url).text
49+
print(html)
50+
51+
def run(self):
52+
"""运行程序"""
53+
self.get_cookie()
54+
self.get_user_data()
10155

10256

10357
if __name__ == '__main__':
104-
if isLogin():
105-
print('Login successfully')
106-
else:
107-
login()
58+
account = input("请输入你的账号:")
59+
password = input("请输入你的密码:")
60+
login = DouBanLogin(account, password)
61+
login.run()

douban/douban_spider.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# 定义请求头
1111
headers = {
12-
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36"
12+
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36"
1313
}
1414
# 循环构建请求参数并且发送请求
1515
for page_start in range(0, 100, 20):

facebook/facebook.py

Lines changed: 6 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,19 @@
66

77

88
def login(session, email, password):
9-
'''
10-
Attempt to login to Facebook. Returns user ID, xs token and
11-
fb_dtsg token. All 3 are required to make requests to
12-
Facebook endpoints as a logged in user. Returns False if
13-
login failed.
14-
'''
15-
16-
# Navigate to Facebook's homepage to load Facebook's cookies.
9+
"""
10+
获取cookie
11+
"""
1712
response = session.get('https://m.facebook.com')
1813

19-
# Attempt to login to Facebook
14+
# 尝试登陆
2015
response = session.post('https://m.facebook.com/login.php', data={
2116
'email': email,
2217
'pass': password
2318
}, allow_redirects=False)
2419

25-
# If c_user cookie is present, login was successful
2620
if 'c_user' in response.cookies:
27-
28-
# Make a request to homepage to get fb_dtsg token
21+
# 说明登陆成功
2922
homepage_resp = session.get('https://m.facebook.com/home.php')
3023

3124
dom = pyquery.PyQuery(homepage_resp.text.encode('utf8'))
@@ -46,7 +39,7 @@ def login(session, email, password):
4639

4740
session = requests.session()
4841
session.headers.update({
49-
'User-Agent': 'Mozilla/5.0 (X11; Linux i686; rv:39.0) Gecko/20100101 Firefox/39.0'
42+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
5043
})
5144

5245
fb_dtsg, user_id, xs = login(session, args.email, args.password)

webWeixin/webWeixin.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
session = requests.session()
1818
headers = {
19-
'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:33.0) Gecko/20100101 Firefox/33.0'
19+
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'
2020
}
2121

2222
QRImgPath = os.path.split(os.path.realpath(__file__))[0] + os.sep + 'webWeixinQr.jpg'

0 commit comments

Comments
 (0)