import requests import tesserocr from PIL import Image import time import json
requests_session = requests.session()
def get_verify_code(): import requests
url = "http://my.cnki.net/elibregister/CheckCode.aspx"
querystring = {"id":(int(time.time())*1000)}
headers = { 'Accept': "image/webp,image/apng,image/*,*/*;q=0.8", 'Accept-Encoding': "gzip, deflate", 'Accept-Language': "zh-CN,zh;q=0.9,en;q=0.8", 'Cache-Control': "no-cache", 'Connection': "keep-alive", 'Cookie': "Ecp_ClientId=8180828132605418310; Ecp_IpLoginFail=18082859.111.198.102; ASP.NET_SessionId=lmkarndj230wvzo2n5ntzge0; SID=020102; ImageV=2QPV", 'DNT': "1", 'Host': "my.cnki.net", 'Pragma': "no-cache", 'Referer': "http://my.cnki.net/elibregister/commonRegister.aspx", 'User-Agent': "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36", 'Postman-Token': "7e66b937-40bd-4de8-a221-f8a2ef8db407" }
response = requests.request("GET", url, headers=headers, params=json.dumps(querystring) )
response = requests_session.get(url, headers=headers) if response.status_code == 200: with open('code.jpg', 'wb') as file: file.write(response.content) image = Image.open('/Users/wenjun/PycharmProjects/get_domain_info/code.jpg') time.sleep(5) image = image.convert('L') threshold = 127 table = [] for i in range(256): if i < threshold: table.append(0) else: table.append(1)
image = image.point(table, '1') image.show() result = tesserocr.image_to_text(image) print(result)
# return_code = pytesseract.image_to_string(img) # print(return_code)
if __name__ == '__main__':
get_verify_code()
|