风晓

前言

今天打开发现文泉课堂需要登陆才能进行访问,一登陆,发现,昨天的502,明显好多了今天附上所有源码,供大家学习和参考

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
#!/usr/bin/env python
# -*- coding:utf-8 -*-
# Author:风晓
import requests
import time
from pathlib import Path
import os
import jwt
import json
from tqdm import trange
data ={}
headers ={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
}
JWT_SECRET = 'g0NnWdSE8qEjdMD8a1aq12qEYphwErKctvfd3IktWHWiOBpVsgkecur38aBRPn2w'
# SESS = httpx.Client()
URL = 'https://lib-nuanxin.wqxuetang.com'
# SESS.get(URL)
''' jwt key for bookid '''
# url = "https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=%s" % bookid
def gen_jwt_key(bookid,cooike):
url = f'{URL}/v1/read/k?bid={bookid}'
print(url)
# r = self.session.get(url, timeout=5)

# r = SESS.get(url, timeout=5)
# j = json.loads(r.text)
head = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'Cookie': cooike,
'Host': 'lib-nuanxin.wqxuetang.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
}
try:
resp = requests.get(url,headers = head)
resp.raise_for_status()
except Exception as exc:
print(exc)

try:
jdata = resp.json()
except Exception as exc:
jdata = {}

res = jdata.get('data')
if res is None:
raise Exception('returned None, something is not right...')

return res
def gen_jwt_token(bookid, page,cooike):
''' gen jwt token '''
cur_time = time.time()
jwtkey = gen_jwt_key(bookid,cooike)
print(jwtkey)
jwttoken = jwt.encode(
{
"p": page,
"t": int(cur_time) * 1000,
"b": str(bookid),
"w": 1000,
"k": json.dumps(jwtkey),
"iat": int(cur_time),
},
JWT_SECRET,
algorithm='HS256',
).decode('ascii')
print(jwttoken)
return jwttoken
def bookinfo(bookid,cooike):
''' bookinfo '''
# url = f"https://lib-nuanxin.wqxuetang.com/v1/read/initread?bid={self.bookid}" # noqa
url = f'{URL}/v1/read/initread?bid={bookid}'
# r = self.session.get(url)
head = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, sdch',
'Accept-Language': 'zh-CN,zh;q=0.8',
'Connection': 'keep-alive',
'Cookie': cooike,
'Host': 'lib-nuanxin.wqxuetang.com',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
}
try:
bookid = int(bookid)
except Exception as exc:
print(exc)
raise
if bookid < 1:
# logger.warning(f' bookid {bookid} < 1, setting bookid to 1')
# bookid = 1
raise Exception(' bookid must be bigger than zero')

try:
print(url)
resp = requests.get(url,headers=head)
resp.raise_for_status()
except Exception as exc:
print(exc)
try:
jdata = resp.json()
except Exception as exc:
print(exc)
jdata = {}
data = jdata.get('data')
print(data)
if data is None:
raise Exception(
'returned None, something is not right...可能无此书号,也有可能是网络有问题或IP被限制……' # noqa
)
bookinfo.jdata = jdata
# print(data.get('canreadpages'))
print(data.get('name'), data.get('canreadpages'))
return data.get('name'), data.get('canreadpages')
def fetch_png(bookid, page=1):
''' download booid page img '''
token = gen_jwt_token(bookid, page,cooike)
print(token)
url = f'{URL}/page/img/{bookid}/{page}?k={token}'
print(url)
headers = {
'accept': 'image / webp, image / *, * / *;q = 0.8',
'referer': f'{URL}/read/pdf/{bookid}',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',
# noqa
}
req = requests.get(url,headers=headers)
res = req.content
if not os.path.exists(str(bookid)):
os.mkdir(str(bookid))
with open(str(bookid)+'/'+str(page+1000)+'.jpg','wb')as f:
f.write(res)

return res
if __name__ == '__main__':
'''使用时更改bookid和将你的cooike更换成你的'''
'''https://lib-nuanxin.wqxuetang.com/v1/read/k?bid=3187886'''
'''cooike用上面的url然后进去f12调试复制里面的cooike'''
bookid =3187886
cooike = '_gid=377254409517; _gidv=6699d59c633414a0287f63aa61c56d4e; PHPSESSID=ngeq25ktm1cp2ipd54kabr2eii; Hm_lvt_a84b27ffd87daa3273555205ef60df89=1580866455,1580876574,1580883745,1580883761; Hm_lpvt_a84b27ffd87daa3273555205ef60df89=1580883767; acw_tc=3ccdc15315808917923211688e5e6c7e01cc28622fbf0190a83bc6ce183541'
try:
page = bookinfo(bookid,cooike)[1]
print(page)
if not os.path.exists(str(bookid)):
os.mkdir(str(bookid))
for i in range(1,int(page)):
if not os.path.exists(str(bookid)+'/'+str(i+1000)+'.jpg'):
try:
print(i)
gen_jwt_token(bookid,i,cooike)
fetch_png(bookid,i)
time.sleep(6)
except Exception as e:
print(e)
except Exception as e:
print(e)

若大量遇到下面这种情况:

空列表,则说明你的cooike可能过期及时更换就好

跑完一遍后打开你的图片保存路径,然后排序按大小,然后翻到最下面若和以下一样说明这几张没成功,删了 重新来一遍就好,重新跑,不会覆盖之前的,只会抓取没有成功的:

最后推荐一个好用的小工具,图片合成pdf工具:
pic2pdf

拥有这些,就可以成功的得到了自己的电子书。。今天就到这儿吧


已经到底了!:

 评论

!--动态线条背景-->