刷青少年普法网的Python脚本

2023-10-21 约 2771 字预计阅读 6 分钟

青少年普法网的要求真是麻烦。乡下学校要求家长去做其实还是很有难度的。利用信息课让学生刷教个半节课还有网址都打不开的，完成率很低。那就用Python技术解决吧。

先是打算用Requests库直接登录。这个破网站又是rsa加密，又是后台blob生成二维码。真是有毒。只好用selenium模拟登录，然后获取Cookie。再用requests加载这个Cookie。后面的练习考试就直接用requests来进行。毕竟速度要快很多。用selenium打开界面还要等待加载。后端速度就嗖嗖的了。验证码就靠dddocr这个开源的识别库了。毕竟百度云ocr要收费。识别成功率也就Soso了。

废话不多，直接上使用方法：

找一个文本文件放入用户名姓名和密码，注意要另存为utf-8编码格式。否则默认的ANSI编码那些中文名字读不出来。第150行修改路径。

用户名1,姓名1,密码1
用户名2,姓名2,密码2
用户名3,姓名3,密码3
……

第28行根据屏幕缩放填入数字，没缩放就填1。屏幕比较大缩放到150就填1.5。
根据开头import里那些库，自己pip安装。这都不会这个脚本不适合你换个会的来。
下载安装好 Firefox 和 geckodriver.exe ，在25行设置好路径。
如果要速度稍微快一点就开启headless模式。22和23行的注释去掉。headless模式屏幕缩放倍率设置为1。
开刷……

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date    : 2023-10-18 23:29:28
# @Author  : Xzap (xzap@163.com)
# @Link    : http://xzap.gitee.io
# @Version : 1.3

import os
import requests
import re
from pathlib import Path
from seleniumwire import webdriver 
import time
from selenium.webdriver.common.by import By
from PIL import Image
import ddddocr
import random
ocr = ddddocr.DdddOcr()
# ocr.load_model()

options = webdriver.FirefoxOptions()
# options.add_argument('-headless')
# options.add_argument('--window-size=1600x1200')
# driver = webdriver.Firefox()
driver = webdriver.Firefox(firefox_binary="F:/Mozilla Firefox/firefox.exe", executable_path="F:/Mozilla Firefox/geckodriver.exe", options=options)

def shuashua(uid, name, passwd):
    beilv = 1.25
    taskid = 24
    driver.get("https://static.qspfw.moe.gov.cn/user/#/user/login")
    driver.implicitly_wait(0.5)
    p_uid = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[2]/div/div/span/span/input')
    p_name = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[3]/div/div/span/span/input')
    p_passwd = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[4]/div/div/span/span/input')
    p_captcha = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[5]/div/div/span/span/input')
    p_img = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[5]/div/div/span/img')
    p_bt = driver.find_element(By.XPATH, '/html/body/div/div/div[2]/div/div[2]/form/div[8]/div/div/span/button')
    p_check = driver.find_element(By.XPATH, '//*[@id="formLogin"]/div[7]/div/div/span/div/label/span[1]/input')
    p_check.click() 
    p_captcha.clear()
    p_uid.clear()
    p_name.clear()
    p_passwd.clear()
    p_uid.send_keys(uid)
    p_name.send_keys(name)
    p_passwd.send_keys(passwd)
    # print (p_img.location)
    # print (p_img.size)
    left   = p_img.location[  'x' ] + 25  #x点的坐标
    top   = p_img.location[  'y' ]  #y点的坐标
    right   = p_img.size[  'width' ]  + left -25  #上面右边点的坐标
    down   = p_img.size[  'height' ]  + top  #下面右边点的坐标
    time.sleep(2)
    # driver.refresh()
    # time.sleep(2)
    driver.get_screenshot_as_file("code.png")
    image   = Image.open ('code.png')     # 截取你整个的登录页面
    # beilv = 1
    code_image = image.crop((left * beilv, top * beilv, right  *   beilv , down  *   beilv ))  

    code_image.save('code_new.png')
    code_image.close()
    image.close()
    with open("code_new.png","rb") as f:
        image = f.read()
    res = ocr.classification(image)
    # txts = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
    txts = '0123456789abcdefghijklmnopqrstuvwxyz'
    res2 = ""
    for i in res :
        if i in txts :
            res2 += i
    print(f"验证码: {res2}")
    # print (res2)
    with open(f"cap/{res2}.png", "wb") as f:
        f.write(image)

    p_captcha.send_keys(res2)
    p_bt.click()
    time.sleep(3)
    ck = driver.get_cookies()
    s = requests.Session()
    h1 = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
        "source": "1"
      }
    s.headers.update(h1)
    for cookie in ck:
         s.cookies.set(cookie['name'],cookie['value'])
    s.headers["Access-Token"] = s.cookies['SESSION']
    c = s.get(url="https://service-r07xmosl-1251413566.sh.apigw.tencentcs.com/userAuthApi/user/info")
    tiku_d = {}
    s.headers["sourceId"] = "2"
    c = s.get(f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/getColumnList?taskId={taskid}")
    # print (c.json())
    t1 = c.json()['data']['columnList'][0]['columnId']
    t2 = c.json()['data']['columnList'][1]['columnId']
    t3 = c.json()['data']['columnList'][2]['columnId']
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t1}&taskId={taskid}")
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t1}&taskId={taskid}")
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']  

    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t2}&taskId={taskid}")
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t2}&taskId={taskid}")
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']  

    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t3}&taskId={taskid}")  
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t3}&taskId={taskid}") 
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']

    # print (tiku)
    c = s.get(url=f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/getPaper?taskId={taskid}").json()
    tp1 = c['data']["paper"]["paperInfo"]
    answer = "" 
    # print ("="*40)
    # print (tp1)
    # print ("="*40)
    for i in tp1 :
        try :
            ccc = tiku_d[i["content"].strip().replace("\n","").replace(" ","")]
            answer += f'{i["id"]}_{ccc}@!@'
        except :
            print (tiku_d)
            print (i)
            answer += f'{i["id"]}_D@!@'
        
    resultid = c['data']["resultId"]    

    r_url = f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/saveResult?taskId={taskid}&paperId={c['data']['paper']['id']}&resultId={resultid}&answers={answer}&takeTime={random.randint(60,100)}"
    c = s.get(r_url)
    s_point =  c.json()['data']['result']['testscore']   
    c = s.get(url=f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/getResultInfo?taskId={taskid}")   
    info1 = c.json()['data']
    # print ("-"*50)
    print (f"学校：{info1['schoolName']} 年级：{info1['gradeName']} 姓名：{info1['userName']}")
    print (f"{info1['medal']} 得分：{s_point} 最高分：{info1['maxScore']}")
    print (f"剩余考试次数：{info1['leftExamCount']}") 

    c = s.get(url="https://service-r07xmosl-1251413566.sh.apigw.tencentcs.com/userAuthApi/user/logout")
    print (c.json()["data"]) 
    driver.get("https://static.qspfw.moe.gov.cn/user/#/user/login")
    time.sleep(2)

students = []
with open("用户名密码文件", "r", encoding="utf-8") as f:
    for i in f :
        student = i.strip().split(",")
        students.append(student)
allnum = len(students)
for num, s in enumerate(students):
    stime = time.time()
    print ("="*50)
    print (f"{num+1} / {allnum} {s[1]}")
    att = 0
    success = False
    while att < 3 and not success :
        try :
            shuashua(s[0], s[1], s[2])
            success = True
        except Exception as e:
            print (e)
            att += 1
            driver.refresh()
            time.sleep(4)
            if att == 3 :
                with open ("fail.txt", "a", encoding="utf-8") as f :
                    f.write(",".join(s) + "\n")
                print (f"{num+1} {s[1]} fail!")
                break
            else :
                print ("Retry...")
    endtime = time.time()
    ttime = endtime - stime
    print (f"耗时{ttime:.2f}秒。")
driver.quit()

Selenuim 还是太老了，改用Playwright重新改写了下。好像速度提高不多。但是好像稳定一丢丢

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145


#!/usr/bin/env python
# -*- coding: utf-8 -*-
# @Date    : 2023-10-21 21:03:11
# @Author  : Xzap (xzap@163.com)
# @Link    : http://xzap.gitee.io
# @Version : 2.0

from playwright.sync_api import Playwright, sync_playwright, expect
import time
import requests
import ddddocr
import random

ocr = ddddocr.DdddOcr()

def shuashua(uid, name, passwd):
    taskid = 24
    page = context.new_page()
    page.goto("https://static.qspfw.moe.gov.cn/user/#/user/login")
    page.get_by_placeholder("请输入学校下发账号").fill(uid)
    page.get_by_placeholder("请输入姓名").fill(name)
    page.get_by_placeholder("默认密码为账号后六位").fill(passwd)
    page.get_by_label("我已阅读并同意").check()
    page.get_by_role("img", name="换一张").screenshot(animations="disabled" , path="pw_cap.png")
    with open("pw_cap.png","rb") as f:
        image = f.read()
    res = ocr.classification(image)
    txts = '0123456789abcdefghijklmnopqrstuvwxyz'
    res2 = ""
    for i in res :
        if i in txts :
            res2 += i
    if len(res2) == 6 :
        res2 = res2[1:]
    print(f"验证码: {res2}")
    page.get_by_placeholder("请输入验证码").fill(res2)
    page.get_by_role("button", name="登 录").click()
    time.sleep(2)

    ck = page.context.cookies()
    s = requests.session()
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36",
            "source": "1",}
    s.headers.update(headers)
    for cookie in ck:
        s.cookies.set(cookie['name'],cookie['value'])

    s.headers["Access-Token"] = s.cookies['SESSION']
    c = s.get(url="https://service-r07xmosl-1251413566.sh.apigw.tencentcs.com/userAuthApi/user/info")
    tiku_d = {}
    s.headers["sourceId"] = "2"
    c = s.get(f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/getColumnList?taskId={taskid}")
    # print (c.json())
    t1 = c.json()['data']['columnList'][0]['columnId']
    t2 = c.json()['data']['columnList'][1]['columnId']
    t3 = c.json()['data']['columnList'][2]['columnId']
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t1}&taskId={taskid}")
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t1}&taskId={taskid}")
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']  

    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t2}&taskId={taskid}")
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t2}&taskId={taskid}")
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']  

    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/studyByColumnId?columnId={t3}&taskId={taskid}")  
    c = s.get(url=f"https://service-6xrqld9h-1251413566.sh.apigw.tencentcs.com/practice/practice?columnId={t3}&taskId={taskid}") 
    tiku = c.json()['data']['questionBankList']
    for i in tiku :
        tiku_d[i['content'].strip().replace("\n","").replace(" ","")] = i['answer']

    # print (tiku)
    c = s.get(url=f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/getPaper?taskId={taskid}").json()
    tp1 = c['data']["paper"]["paperInfo"]
    answer = "" 
    # print ("="*40)
    # print (tp1)
    # print ("="*40)
    for i in tp1 :
        try :
            ccc = tiku_d[i["content"].strip().replace("\n","").replace(" ","")]
            answer += f'{i["id"]}_{ccc}@!@'
        except :
            print (tiku_d)
            print (i)
            answer += f'{i["id"]}_D@!@'
        
    resultid = c['data']["resultId"]    

    r_url = f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/saveResult?taskId={taskid}&paperId={c['data']['paper']['id']}&resultId={resultid}&answers={answer}&takeTime={random.randint(60,100)}"
    c = s.get(r_url)
    s_point =  c.json()['data']['result']['testscore']   
    c = s.get(url=f"https://service-pw82n1db-1251413566.sh.apigw.tencentcs.com/paper/getResultInfo?taskId={taskid}")   
    info1 = c.json()['data']
    # print ("-"*50)
    print (f"学校：{info1['schoolName']} 年级：{info1['gradeName']} 姓名：{info1['userName']}")
    print (f"{info1['medal']} 得分：{s_point} 最高分：{info1['maxScore']}")
    print (f"剩余考试次数：{info1['leftExamCount']}") 

    c = s.get(url="https://service-r07xmosl-1251413566.sh.apigw.tencentcs.com/userAuthApi/user/logout")
    print (c.json()["data"]) 
    page.goto("https://static.qspfw.moe.gov.cn/user/#/user/login")
    time.sleep(1)



with sync_playwright() as playwright:
    # 'chromium'  Or 'firefox' or 'webkit'.
    browser = playwright.chromium.launch(headless=True)
    context = browser.new_context()
    students = []
    with open("321.csv", "r", encoding="utf-8") as f:
        for i in f :
            student = i.strip().split(",")
            students.append(student)
    allnum = len(students)
    for num, ss in enumerate(students):
        stime = time.time()
        print ("="*50)
        print (f"{num+1} / {allnum} {ss[1]}")
        att = 0
        success = False
        while att < 3 and not success :
            try :
                shuashua(ss[0], ss[1], ss[2])
                success = True
            except Exception as e:
                print (e)
                att += 1
                time.sleep(3)
                if att == 3 :
                    with open ("fail.txt", "a", encoding="utf-8") as f :
                        f.write(",".join(s) + "\n")
                    print (f"{num+1} {s[1]} fail!")
                    break
                else :
                    print ("Retry...")
        endtime = time.time()
        ttime = endtime - stime
        print (f"耗时{ttime:.2f}秒。")
    context.close()
    browser.close()