某课堂字体逆向以及自动答题#

起因：一门课老师在某课堂给出了刷题题库，可以无限次提交，直至答对
想法：
- 手动做题然后通过导出响应生成题库，考试时直接搜索（选修课，且在手机上考试）
- 自动做题，自动试错，直至答对
未来展望
- 实现登录自动获取cookie（目前需要手动提取）
- 实现自动获取所有课程已做过的题目，生成题库
该文章中所有代码均已提交至 Github
- qingkong9579
  /
  yuketang_reverse
  Waiting for api.github.com...
  00K
  0K
  0K
  Waiting...

已答题的题库提取

手动保存响应，观察响应格式（类似该格式的请求 https://www.yuketang.cn/mooc-api/v1/lms/exercise/get_exercise_list/5554113/）
Body为题目，Options为选项，当答题后可用user下的is_right判断当前答案是否为正确答案

本文主题雨课堂字体逆向，参考文章【小记】探探学习平台的字体混淆 - SomeBottle - 博客园

核心思想相同字体的字形相同
$unicode_{encrypt} \leftarrow hash(encrypt\_fonts) = hash(source\_font) \rightarrow unicode_{source}$
首先从请求中获取加密后的字体（原文章从网页中正则匹配加密字体url）
双击打开查看字体信息（左边为加密字体，右边为原字体），从此处也可以看出，只对中文进行了混淆，英文和数字无变化，原字体可从GitHub下载（adobe-fonts/source-han-sans: Source Han Sans | 思源黑体 | 思源黑體 | 思源黑體香港 | 源ノ角ゴシック | 본고딕）

首先由生成原字体和加密字体的字形哈希到Unicode的映射

1
def extract_glyph_mapping(font_path: str) -> Dict[str, int]:
2
    """
3
    从字体文件中提取字形哈希到Unicode的映射
4

5
    参数:
6
        font_path: 字体文件路径
7
    返回:
8
        字形哈希到Unicode的映射字典
9
    """
10
    # 加载字体
11
    font = load_font(font_path)
12

13
    # 获取字形映射
14
    glyphs_to_uni = {}
15

16
    cmap = font.getBestCmap()
17

18
    for unicode_val, glyph_name in cmap.items():
19
        glyph = font.getGlyphSet().get(glyph_name)
20
        if glyph and hasattr(glyph, 'draw'):
21
            # 使用一个自定义路径对象来捕获绘制命令
22
            path_collector = PathCollector()
23
            glyph.draw(path_collector)
24

25
            glyph_hash = hash_glyph(path_collector.commands)
26

27
            # 如果哈希已存在则跳过 (处理碰撞)
28
            if glyph_hash not in glyphs_to_uni:
29
                glyphs_to_uni[glyph_hash] = unicode_val
30

31
    return glyphs_to_uni
32

33
class PathCollector:
34
    def __init__(self):
35
        self.commands = []
36

37
    def moveTo(self, p):
38
        self.commands.append(('moveTo', p[0], p[1]))
39

40
    def lineTo(self, p):
41
        self.commands.append(('lineTo', p[0], p[1]))
42

43
    def curveTo(self, *points):
44
        self.commands.append(('curveTo', *[(p[0], p[1]) for p in points]))
45

46
    def qCurveTo(self, *points):
47
        self.commands.append(('qCurveTo', *[(p[0], p[1]) for p in points]))
48

49
    def closePath(self):
50
        self.commands.append(('closePath',))

生成完毕后，将两者hash进行比对，若相同，则将加密字体hash所对应的unicode码与正确unicode码建立映射

1
def create_unicode_mapping(original_mapping_path: str, encrypted_mapping_path: str, output_path: str = './unicode_mapping.json') -> Dict:
2
    """
3
    创建加密Unicode到正确Unicode的映射表
4

5
    参数:
6
        original_mapping_path: 原始字体字形到Unicode的映射文件路径
7
        encrypted_mapping_path: 加密字体字形到Unicode的映射文件路径
8
        output_path: 输出映射表的文件路径
9
    返回:
10
        加密Unicode到正确Unicode的映射字典
11
    """
12
    try:
13
        # 加载原始映射和加密映射
14
        with open(original_mapping_path, 'r', encoding='utf-8') as f:
15
            original_mapping = json.load(f)
16

17
        with open(encrypted_mapping_path, 'r', encoding='utf-8') as f:
18
            encrypted_mapping = json.load(f)
19

20
        # 创建反向映射（字形哈希 -> 原始Unicode）
21
        hash_to_original_unicode = original_mapping
22

23
        # 创建反向映射（字形哈希 -> 加密Unicode）
24
        hash_to_encrypted_unicode = encrypted_mapping
25

26
        # 创建加密Unicode到原始Unicode的映射
27
        unicode_mapping = {}
28

29
        # 对于每个字形哈希，找到对应的加密Unicode和原始Unicode
30
        for glyph_hash in hash_to_original_unicode:
31
            if glyph_hash in hash_to_encrypted_unicode:
32
                encrypted_unicode = hash_to_encrypted_unicode[glyph_hash]
33
                original_unicode = hash_to_original_unicode[glyph_hash]
34

35
                # 将加密Unicode映射到原始Unicode
36
                unicode_mapping[str(encrypted_unicode)] = original_unicode
37

38
        # 写入JSON文件
39
        with open(output_path, 'w', encoding='utf-8') as f:
40
            json.dump(unicode_mapping, f,
41
                     ensure_ascii=False,
42
                     indent=2)
43

44
        print(f"成功创建Unicode映射表，共 {len(unicode_mapping)} 个映射")
45
        return unicode_mapping
46

47
    except Exception as e:
48
        print(f"创建Unicode映射表失败: {e}")
49
        return {}

生成正确映射如图（只适用于当前请求，每次请求所使用的加密字体均会变化），此时只需要读取此次请求中加密字体的Unicode码，将其修改为对应Unicode码，则可获取正确的语句
最终结果

自动答题实现

建议先通过先前的请求，获取题目的类型以及选项

1
with open('data.json', 'r') as file:
2
    data = json.load(file)
3
    # print(data)
4
data = data['data']
5
font_url = data['font']
6
problems = data['problems']
7
# 从第80个题开始处理
8
# problems = problems[80:]
9
for problem in problems:
10
    index = problem.get('index', None)
11
    content = problem.get('content', None)
12
    ProblemID = content.get('ProblemID', None)
13
    Type = content.get('Type', None)
14

15
    # 获取所有可用选项
16
    Options = content.get('Options', None)
17
    options_keys = []
18
    if Options:
19
        print("Problem Options:")
20
        for option in Options:
21
            key = option.get('key', '')
22
            options_keys.append(key)
23

24
    print(f"\n处理题目 {index}, ID: {ProblemID}, 类型: {Type}")
25
    print(f"可用选项: {options_keys}")

拼接请求体

1
# 请求体范例
2
json_data = {
3
    "classroom_id": 25012730,
4
    "problem_id": 54710288,
5
    "answer": ["A", "B"],
6
}

请求体由显而易见的三个字段组成，其中answer有些许需要特殊处理的步骤，若题目类型（Type字段）为单选题或判断题，依次尝试，直至 is_correct 字段返回 True 则为正确答案，若为多选题，则需要提交上面生成的整个答案数组，答案中会返回 my_answer 内部会标出你所提交答案的正确与否，在下次提交中就可以生成正确的答案进行提交

1
found_answer = False
2
    if Type == 'SingleChoice':
3
        # 依次尝试每个选项
4
        for option_key in options_keys:
5
            answer = [option_key]
6
            print(f"尝试答案: {answer}")
7

8
            json_data = {
9
                "classroom_id": 25012730,
10
                "problem_id": ProblemID,
11
                "answer": answer,
12
            }
13

14
            response = make_request_with_rate_limit(post_url, json_data, headers)
15

16
            if response.status_code == 200:
17
                response_json = response.json()
18
                print(response_json)
19
                print(response_json['data'])
20

21
                if response_json['data']['is_correct']:
22
                    print(f"✓ 找到正确答案: {answer}")
23
                    found_answer = True
24
                    break
25
                else:
26
                    print(f"✗ 答案错误: {answer}")
27
                    time.sleep(1)  # 等待1秒再尝试下一个选项
28
            else:
29
                print(f"请求失败，状态码: {response.status_code}")
30

31
        if not found_answer:
32
            print(f"警告：未能找到题目 {index} (ID: {ProblemID}) 的正确答案")
33
    elif Type == 'MultipleChoice':
34
        # 提交整个选项列表
35
        answer = options_keys
36
        print(f"尝试答案: {answer}")
37
        json_data = {
38
            "classroom_id": 25012730,
39
            "problem_id": ProblemID,
40
            "answer": answer,
41
        }
42

43
        response = make_request_with_rate_limit(post_url, json_data, headers)
44

45
        if response.status_code == 200:
46
            response_json = response.json()
47
            print(response_json)
48
            my_answers = response_json['data']['my_answers']
49
            # 提取正确答案
50
            correct_answers = [key for key, value in my_answers.items() if value]
51
            # 提取错误答案
52
            wrong_answers = [key for key, value in my_answers.items() if not value]
53

54
            # 重新提交正确答案
55
            json_data = {
56
                "classroom_id": 25012730,
57
                "problem_id": ProblemID,
58
                "answer": correct_answers,
59
            }
60
            response = make_request_with_rate_limit(post_url, json_data, headers)
61
            print(f"状态码: {response.status_code}")
62
            response_json = response.json()
63
            if response_json['data']['is_correct']:
64
                print(f"✓ 找到正确答案: {correct_answers}")
65
                found_answer = True
66
            else:
67
                print(f"✗ 答案错误: {correct_answers}")
68
    elif Type == 'Judgement':
69
        # 依次尝试每个选项
70
        for option_key in options_keys:
71
            answer = [option_key]
72
            print(f"尝试答案: {answer}")
73

74
            json_data = {
75
                "classroom_id": 25012730,
76
                "problem_id": ProblemID,
77
                "answer": answer,
78
            }
79

80
            response = make_request_with_rate_limit(post_url, json_data, headers)
81

82
            if response.status_code == 200:
83
                response_json = response.json()
84
                print(response_json)
85
                print(response_json['data'])
86

87
                if response_json['data']['is_correct']:
88
                    print(f"✓ 找到正确答案: {answer}")
89
                    found_answer = True
90
                    break
91
                else:
92
                    print(f"✗ 答案错误: {answer}")
93
            else:
94
                print(f"请求失败，状态码: {response.status_code}")
95

96
        if not found_answer:
97
            print(f"警告：未能找到题目 {index} (ID: {ProblemID}) 的正确答案")

关于make_request_with_rate_limit函数的解释，提交过快会触发风控，返回响应代码为429，响应体中含有等待时间，可以通过正则表达式提取，进行等待重试

1
def make_request_with_rate_limit(url, json_data, headers, max_retries=3):
2
    """处理请求，自动处理速率限制并重试"""
3
    for retry in range(max_retries):
4
        response = requests.post(url, json=json_data, headers=headers)
5
        print(f"状态码: {response.status_code}")
6

7
        if response.status_code != 429:
8
            return response
9

10
        # 处理 429 错误
11
        detail = response.json().get('detail', '')
12
        print(f"速率限制: {detail}")
13
        wait_time = 5  # 默认等待5秒
14

15
        # 尝试从错误消息中提取等待时间
16
        match = re.search(r'Expected available in (\d+\.?\d*) seconds', detail)
17
        if match:
18
            wait_time = float(match.group(1)) + 2  # 加2秒作为缓冲
19

20
        print(f"等待 {wait_time:.1f} 秒后重试... (尝试 {retry+1}/{max_retries})")
21

22
        # 实现倒计时显示
23
        total_seconds = int(wait_time)
24
        for remaining in range(total_seconds, 0, -1):
25
            sys.stdout.write(f"\r剩余等待时间: {remaining} 秒...")
26
            sys.stdout.flush()
27
            time.sleep(1)
28

29
        # 清除倒计时行并显示继续执行的信息
30
        sys.stdout.write("\r等待完成，正在重试请求...                \n")
31
        sys.stdout.flush()
32

33
    # 如果所有重试都失败，返回最后一次响应
34
    return response

‍