LLM测试题

本页面分享各类大语言模型测试题

测试题生成器

测试题生成器 | 作者：ZETA

保险箱密码测试题.py24点组合测试题.py

import random

def check_guess_condition(password, guess):
    # Count matching positions
    matches = [i for i in range(7) if password[i] == guess[i]]
    # Exactly two matches
    if len(matches) != 2:
        return False
    # Non-adjacent matches
    return all((matches[i+1] - matches[i] > 1) for i in range(len(matches)-1))

def puzzle_matches_password(password, guesses):
    # Check if this password satisfies all guess conditions
    return all(check_guess_condition(password, g) for g in guesses)

def is_unique_solution(password, guesses):
    # Check uniqueness by trying all possible 7-digit distinct combinations
    all_digits = '0123456789'
    count_solutions = 0
    for combo in all_combinations_of_7_distinct(all_digits):
        if puzzle_matches_password(combo, guesses):
            count_solutions += 1
            if count_solutions > 1:
                return False
    return count_solutions == 1

def all_combinations_of_7_distinct(digits):
    # Generate all distinct 7-digit combinations from given digits
    from itertools import permutations
    for p in permutations(digits, 7):
        yield ''.join(p)

def is_valid_guess(password, guess):
    # Guess must have distinct digits
    if len(set(guess)) != 7:
        return False
    # Must match exactly two non-adjacent positions
    return check_guess_condition(password, guess)

def generate_random_guess(password):
    while True:
        guess = ''.join(random.sample('0123456789', 7))
        if is_valid_guess(password, guess):
            return guess

def generate_puzzles(n=5):
    puzzles = []
    all_digits = '0123456789'
    attempts = 0
    while len(puzzles) < n and attempts < 100000:
        password = ''.join(random.sample(all_digits, 7))
        guesses = []
        guess_attempts = 0
        # Try to find four valid guesses for this password
        while len(guesses) < 4 and guess_attempts < 10000:
            g = generate_random_guess(password)
            if g not in guesses:
                guesses.append(g)
            guess_attempts += 1

        # Now verify uniqueness
        if len(guesses) == 4 and is_unique_solution(password, guesses):
            puzzles.append((password, guesses))
        attempts += 1
    return puzzles

if __name__ == "__main__":
    mode = input("請選擇模式 (1: 手動輸入, 2: 隨機生成, 3: 反推題目): ")
    if mode == "1":
        # 手動輸入模式
        guesses = []
        print("\n請輸入四個7位數的猜測，每個數字不能重複：")
        for i in range(4):
            guess = input(f"請輸入第{i+1}個猜測：")
            if len(guess) == 7 and len(set(guess)) == 7:
                guesses.append(guess)
            else:
                print("輸入不合法，請輸入一個由7個不同數字組成的數字。")
                exit()

        # 尋找符合條件的密碼
        found_password = None
        all_digits = '0123456789'
        for possible_password in all_combinations_of_7_distinct(all_digits):
            if puzzle_matches_password(possible_password, guesses):
                if found_password is None:  # 第一個找到的解
                    found_password = possible_password
                else:  # 找到第二個解，表示解不唯一
                    print("這些猜測無法得出唯一解！")
                    exit()

        if found_password:
            print("\n根據您的猜測：", guesses)
            print("唯一可能的密碼是:", found_password)
        else:
            print("\n找不到符合條件的密碼！")

    elif mode == "2":
        # 隨機生成模式
        puzzles = generate_puzzles(1)  # 只生成一組
        if puzzles:
            password, guesses = puzzles[0]
            print("\nSroan 的私人保險箱密碼為 7 個不同的數字。", end='')
            print(f"現有四人猜測密碼，A 猜 {guesses[0]}，B 猜 {guesses[1]}，C 猜 {guesses[2]}，D 猜 {guesses[3]}。")
            print("Sroan 說：\"你們每個人都猜對了兩個數字，且這兩個數字的位置不相鄰。\"")
            print("假設四人足夠聰明，請問他們能否推理出正確的密碼？密碼是多少？")
            print("\n答案:", password)
        else:
            print("無法生成符合條件的謎題")

    elif mode == "3":
        # 反推題目模式
        print("\n請輸入一個7位數密碼（每個數字不能重複）：")
        target_password = input("密碼：")
        if len(target_password) != 7 or len(set(target_password)) != 7:
            print("密碼格式不正確！需要7個不同的數字。")
            exit()

        # 嘗試生成符合條件的猜測
        max_attempts = 100000
        attempts = 0
        best_guesses = None

        while attempts < max_attempts:
            guesses = []
            guess_attempts = 0
            # 嘗試生成四個猜測
            while len(guesses) < 4 and guess_attempts < 1000:
                guess = generate_random_guess(target_password)
                if guess not in guesses:
                    guesses.append(guess)
                guess_attempts += 1

            # 檢查是否生成了四個猜測且解是唯一的
            if len(guesses) == 4 and is_unique_solution(target_password, guesses):
                best_guesses = guesses
                break

            attempts += 1

        if best_guesses:
            print("\nSroan 的私人保險箱密碼為 7 個不同的數字。", end='')
            print(f"現有四人猜測密碼，A 猜 {best_guesses[0]}，B 猜 {best_guesses[1]}，C 猜 {best_guesses[2]}，D 猜 {best_guesses[3]}。")
            print("Sroan 說：\"你們每個人都猜對了兩個數字，且這兩個數字的位置不相鄰。\"")
            print("假設四人足夠聰明，請問他們能否推理出正確的密碼？密碼是多少？")
            print("\n答案:", target_password)
        else:
            print("無法為此密碼生成合適的題目")
    else:
        print("無效選擇，請輸入1、2或3。")

from itertools import permutations, product, combinations
import random

def calculate_target(numbers, target):
    operators = ['+', '-', '*', '/']
    number_permutations = list(permutations(numbers))
    operator_combinations = list(product(operators, repeat=3))
    solutions = set()

    for nums in number_permutations:
        for ops in operator_combinations:
            try:
                # 方式1: (a op1 b) op2 c) op3 d
                result1 = eval(f"({nums[0]}{ops[0]}{nums[1]}){ops[1]}{nums[2]}{ops[2]}{nums[3]}")
                if abs(result1 - target) < 0.0001:
                    solutions.add(f"({nums[0]}{ops[0]}{nums[1]}){ops[1]}{nums[2]}{ops[2]}{nums[3]}")

                # 方式2: ((a op1 b) op2 c) op3 d
                result2 = eval(f"(({nums[0]}{ops[0]}{nums[1]}){ops[1]}{nums[2]}){ops[2]}{nums[3]}")
                if abs(result2 - target) < 0.0001:
                    solutions.add(f"(({nums[0]}{ops[0]}{nums[1]}){ops[1]}{nums[2]}){ops[2]}{nums[3]}")

                # 新增分數形式: (a-b)/(c-d)
                result3 = eval(f"({nums[0]}-{nums[1]})/({nums[2]}-{nums[3]})")
                if abs(result3 - target) < 0.0001:
                    solutions.add(f"({nums[0]}-{nums[1]})/({nums[2]}-{nums[3]})")

            except (ZeroDivisionError, ValueError):
                continue

    return solutions

def generate_valid_numbers(target):
    max_attempts = 1000
    attempt = 0
    while attempt < max_attempts:
        numbers = [random.randint(1, 13) for _ in range(4)]
        solutions = calculate_target(numbers, target)
        if solutions:  # 如果找到解答
            return numbers, solutions
        attempt += 1
    return None, None

def find_all_possible_combinations(target):
    possible_combinations = []
    # 生成1-13的所有可能組合
    base_numbers = list(range(1, 14))
    seen_combinations = set()

    # 先生成2個數字的組合
    for combo in combinations(base_numbers, 2):
        # 對於每個2數字組合，允許每個數字最多使用3次
        for i in range(1, 4):  # 第一個數字可以重複1-3次
            for j in range(1, 4):  # 第二個數字可以重複1-3次
                if i + j == 4:  # 確保總共有4個數字
                    numbers = list(combo[0] for _ in range(i)) + list(combo[1] for _ in range(j))
                    numbers.sort()  # 排序以確保唯一性
                    numbers_tuple = tuple(numbers)
                    if numbers_tuple not in seen_combinations:
                        seen_combinations.add(numbers_tuple)
                        solutions = calculate_target(numbers, target)
                        if solutions:
                            possible_combinations.append((numbers, solutions))

    # 生成3個不同數字的組合
    for combo in combinations(base_numbers, 3):
        # 對於3個數字的組合，允許其中一個數字重複使用
        for repeat_idx in range(3):
            numbers = list(combo)
            numbers.insert(repeat_idx, numbers[repeat_idx])
            numbers.sort()
            numbers_tuple = tuple(numbers)
            if numbers_tuple not in seen_combinations:
                seen_combinations.add(numbers_tuple)
                solutions = calculate_target(numbers, target)
                if solutions:
                    possible_combinations.append((numbers, solutions))

    # 生成4個不同數字的組合
    for combo in combinations(base_numbers, 4):
        numbers = list(combo)
        numbers_tuple = tuple(numbers)
        if numbers_tuple not in seen_combinations:
            seen_combinations.add(numbers_tuple)
            solutions = calculate_target(numbers, target)
            if solutions:
                possible_combinations.append((numbers, solutions))

    if possible_combinations:
        print(f"\n=== 找到以下可能的組合 / Found following combinations ===\n")
        for numbers, solutions in possible_combinations:
            print("="*50)
            print(f"請使用數字 {numbers} 計算出 {target}。")
            print("可以使用 +, -, *, / 運算符，每個數字只能使用一次。")
            print(f"\nSolve this how to get {target} from {numbers[0]}, {numbers[1]}, {numbers[2]}, {numbers[3]}")
            print("by simple addition, subtraction, multiplication and division, using each number once.")
            print(f"\n解答 / Solutions ({len(solutions)} solutions):")
            for solution in solutions:
                print(f"{solution} = {target}")
            print("="*50 + "\n")
    else:
        print(f"\n找不到任何4個數字的組合可以得到 {target}。")
        print(f"Cannot find any combination of four numbers that can make {target}.")

    return possible_combinations

def print_problem(numbers, target):
    print(f"\n請使用數字 {numbers} 計算出 {target}。")
    print("可以使用 +, -, *, / 運算符，每個數字只能使用一次。")
    print(f"\nSolve this how to get {target} from {numbers[0]}, {numbers[1]}, {numbers[2]}, {numbers[3]}")
    print("by simple addition, subtraction, multiplication and division, using each number once.")

def verify_numbers(numbers, target):
    solutions = calculate_target(numbers, target)
    if solutions:
        print(f"\n恭喜！這組數字確實可以計算出 {target}！")
        print(f"Congratulations! These numbers can make {target}!")
        print(f"\n共找到 {len(solutions)} 種解法 / Found {len(solutions)} solutions:")
        for solution in solutions:
            print(f"{solution} = {target}")
    else:
        print(f"\n很抱歉，這組數字無法計算出 {target}。")
        print(f"Sorry, these numbers cannot make {target}.")

def main():
    while True:
        print("\n=== 數字運算遊戲 / Number Game ===")
        print("1. 產生題目 / Generate problem")
        print("2. 驗證解答 / Verify solution")
        print("3. 尋找所有可能組合 / Find all possible combinations")
        print("4. 離開遊戲 / Exit")

        choice = input("\n請選擇 / Please choose (1-4): ")

        if choice == '4':
            print("謝謝使用！/ Thank you for playing!")
            break

        elif choice == '1':
            try:
                target = int(input("\n請輸入目標數字 / Please input target number: "))
                numbers, solutions = generate_valid_numbers(target)

                if numbers and solutions:
                    print("="*50)
                    print(f"請使用數字 {numbers} 計算出 {target}。")
                    print("可以使用 +, -, *, / 運算符，每個數字只能使用一次。")
                    print(f"\nSolve this how to get {target} from {numbers[0]}, {numbers[1]}, {numbers[2]}, {numbers[3]}")
                    print("by simple addition, subtraction, multiplication and division, using each number once.")
                    print(f"\n解答 / Solutions ({len(solutions)} solutions):")
                    for solution in solutions:
                        print(f"{solution} = {target}")
                    print("="*50)
                else:
                    print(f"\n無法找到可以計算出 {target} 的4個數字組合。")
                    print(f"Cannot find four numbers that can make {target}.")

            except ValueError:
                print("錯誤：請輸入有效的數字！/ Error: Please input a valid number!")

        elif choice == '2':
            try:
                print("\n請輸入4個數字，用空格分隔 / Please input 4 numbers separated by spaces:")
                numbers = list(map(int, input().split()))
                if len(numbers) != 4:
                    print("錯誤：請確實輸入4個數字！/ Error: Please input exactly 4 numbers!")
                    continue

                target = int(input("請輸入目標數字 / Please input target number: "))
                verify_numbers(numbers, target)

            except ValueError:
                print("錯誤：請輸入有效的數字！/ Error: Please input valid numbers!")

        elif choice == '3':
            try:
                target = int(input("\n請輸入目標數字 / Please input target number: "))
                print(f"\n正在尋找所有可能的數字組合，請稍候... / Searching for all possible combinations...")
                combinations = find_all_possible_combinations(target)

                if combinations:
                    print(f"\n找到 {len(combinations)} 組不同的數字組合可以得到 {target}：")
                    print(f"Found {len(combinations)} different number combinations that can make {target}:")
                    for numbers, solutions in combinations:
                        print("\n" + "="*50)
                        print(f"請使用數字 {numbers} 計算出 {target}。")
                        print("可以使用 +, -, *, / 運算符，每個數字只能使用一次。")
                        print(f"\nSolve this how to get {target} from {numbers[0]}, {numbers[1]}, {numbers[2]}, {numbers[3]}")
                        print("by simple addition, subtraction, multiplication and division, using each number once.")
                        print(f"\n解答 / Solutions ({len(solutions)} solutions):")
                        for solution in solutions:
                            print(f"{solution} = {target}")
                        print("="*50)
                else:
                    print(f"\n找不到任何4個數字的組合可以得到 {target}。")
                    print(f"Cannot find any combination of four numbers that can make {target}.")

            except ValueError:
                print("錯誤：請輸入有效的數字！/ Error: Please input valid numbers!")

        else:
            print("無效的選擇！請重試。/ Invalid choice! Please try again.")

if __name__ == "__main__":
    main()

Soran Plzzle 测试题生成器下载.exe文件

模型检测题

经典鲁迅和周树人互殴事件

token燃烧题

SSRT-100 推理基准测试（Beta1.2） | 作者：YoRHa

问题答案

作者：YoRHa

SSRT-100 推理基准测试（Beta1.2）
以下是对两张实拍图像的描述，这些图像包含了所有关键线索，请你根据严密推理分析判断图中内容，排除不合理的推测，解释图中发生的事件，并回答几个问题。
信息：
第一张图像的内容：
图像下侧UI内容：
中央仪表：
时间：T+00:06:56
左侧仪表：
速度：0 km/h
高度：0 km
LOX：接近耗尽
CH4：有少量剩余
助推器的监控视图：助推器姿态垂直，内圈有三台引擎正在运作。
右侧仪表：
速度：17,535 km/h
高度：144 km
LOX：接近耗尽
CH4：有少量剩余
星舰的监视视图：星舰姿态水平，无引擎运作。
图像拍摄内容描述：超重助推器位于发射台上，内圈有三台发动机点火，火焰和烟雾非常明显。
右下角
“Starship飞行测试”。
观众人数：180万
第二张图像的内容：
图像下侧UI内容：
中央仪表：
时间：T+00:06:57
左侧仪表：
速度：0 km/h
高度：0 km
LOX：接近耗尽
CH4：有少量剩余
助推器的监控视图：助推器姿态垂直，内圈引擎关机。
右侧仪表：
速度：17,535 km/h
高度：144 km
LOX：接近耗尽
CH4：有少量剩余
星舰的监控视图：星舰保持水平，没有发动机点火。
图像拍摄内容描述：发射塔和助推器的高角度俯拍。
助推器结构完好，箭体与塔体两只机械臂接触，没有其他机械或管线连接，助推器底部可见少量明火和烟雾。
右下角
“Starship飞行测试”。
观众人数：180万

测试问题（仅作答）： 
第一部分：基础事实识别（20分）
1.此次任务的性质是（亚轨道/轨道）测试还是地面测试？（5分）
2.助推器内圈三台引擎点火的作用是什么？（5分）
3.机械臂的作用是什么？（5分）
4.图二中，助推器底侧的火情最可能是正常还是异常状态？（5分）
第二部分： 数据解读（30分）
1.结合两级火箭的燃料状态，推断任务进行到了哪个阶段？（15分）
2.根据时间戳和各项参数，这次任务经历了哪些关键节点？简要描述现有的任务剖面（15分）
第三部分：核心推理（50分）
1.两张图片记录的是什么“关键时刻”？为什么？（25分）
2.判断助推器是否达成预期目标，依据是什么。（25分）

参考评分标准：
第一部分：
1.亚轨道测试5分  轨道测试3分 大意相同或指出不达第一宇宙速度，得满分；地面测试或其余答案不得分
2.着陆阶段的姿态控制/减速（5分），大意相同得分，其余答案不得分
3.抓取并固定助推器（5分），其余答案不得分
4.引擎关机后正常的余燃现象（5分），其余答案（包括燃烧的正常表现）不得分
第二部分：
1.助推器回收。（15分），其余答案不得分。
2.主要阶段：点火起飞、级间分离、助推器回收（整体准确计15分）
第三部分：
1.助推器完成使命，返回发射场并被机械臂“抓取”或“固定” 的瞬间。（25分）大意相同得分，其余答案不得分
2.成功，助推器成功回收（必须根据助推器回收得出成功结论，其余答案均不得分）（25分）

StarShip高级推理能力基准测试（SSRT-100）
测试记录（取多次平均值）
Chatgpt模型：
O1系列：
Chatgpt o1：100分（推理模型并列第一）
Chatgpt o1-preview：95分
Chatgpt o1-pro：100分（推理模型并列第一）
Chatgpt o1-mini：10分
o3系列：
Chatgpt o3-mini：10分
Chatgpt o3-mini-high：10分
4o系列：
Chatgpt 4o-latest：5分
Chatgpt 4o-240513：5分
Chatgpt 4o-mini：5分

Claude系列模型：
3.5系列：
Claude 3.5 Sonnet 1022：68分
Claude 3.5 Sonnet 0620：98分（非推理模型最高分）
claude-3-5-haiku-20241022：5分
3.0系列：
claude-3-sonnet-20240229：5分
claude-3-opus-20240229：8分

谷歌模型：
实验系列：
Gemini 2.0 Flash Exp：45分
Gemini Exp 1206：0分
Gemini 2.0 Flash Thinking Exp 01-21：90分
常规系列：
Gemini 1.5 Pro：0分
Gemini 1.5 Flash：8分

中国国产模型（若无备注，均为App版本）：
深度求索 Deepseek R1：100分（推理模型并列第一）
深度求索 Deepseek V3：10分
通义千问：3分
文心一言：3分
讯飞星火：0分
kimi：8分
kimi 1.5长思考：8分
清谱智言(GLM4）：10分
清谱智言(GLM4-Plus)：10分

间谍题-字母反转 | 作者：ZETA

问题答案

有一天，美国中央情报局收到了一封可疑的匿名信。打开信件后，里面有一张字条，上面写着：“qs lHS lN”。美国专家翻译了很久都没有成功，只好请教中国专家。中国专家看了一眼就说：“你看反了。”

这个字条的意思是？

反转后：NI SHI sb
中文拼音：你是傻逼

四棱锥最大内接球半径求解 | 作者：ZETA

问题答案

已知四棱锥P−ABCD的侧面PAD为正三角形，底面ABCD为矩形，且面PAD与面ABCD垂直。
如果PA=4/√3，AB=2，求该四棱锥内可以放置的最大球的半径。

r = 2 − √2

星语者的宇宙坐标密钥解析 | 作者：ZETA

问题答案

在浩瀚的宇宙深处，一个古老而高度发达的文明——“星语者”曾以其精妙的数学和密码技术著称。他们将宇宙的奥秘编码隐藏在一系列看似随机的数字之中，期望后来的智慧文明能解读。

如今，我们偶然发现了一组由星语者遗留的神秘数字序列：

序列 A: 3, 15, 35, 77, 99, 143, 169, 221, 289, 323, ...
序列 B: 2, 6, 12, 30, 56, 90, 132, 182, 240, 306, ...
序列 C: 5, 25, 125, 625, ...
序列 D: 6, 24, 120, 720, ...

星语者的记录表明，这四个序列并非独立存在，它们共同指向一个隐藏的“宇宙坐标密钥”。为了找到这个密钥，你需要：

解析序列规律：仔细分析序列 A、B、C、D，找出每个序列背后的数学规律。

序列关联与转换：星语者暗示，这四个序列之间存在某种关联，你需要找到将序列 A 和序列 B 转换为序列 C 和序列 D 的方法。提示：考虑序列的“项”与“项”之间的运算关系，以及序列的“序号”所代表的意义。

密钥生成：一旦你理解了序列之间的关联，以及每个序列的本质规律，就能够推导出“宇宙坐标密钥”的生成方法。这个密钥由一个不超过 100 的正整数构成。

请求解：宇宙坐标密钥 = ___

正确答案：30
备注：GPT系列喜欢答 42

Sroan 的私人保险箱 | 作者：ZETA

问题答案

Sroan 的私人保险箱密码为 7 个不同的数字。现有四人猜测密码，A 猜 8395760，B 猜 9427601，C 猜 5783462，D 猜 7912840。
Sroan 说：“你们每个人都猜对了两个数字，且这两个数字的位置不相邻。”
假设四人足够聪明，请问他们能否推理出正确的密码？密码是多少？

火柴棒謎題 | 作者：报纸

问题答案

當前有兩個不正確的等式，這些等式中的數字和運算符號均以液晶數字（LCD）的形式由火柴棒組成：

1. **等式一**：9 + 2 = 5
2. **等式二**：5 - 2 = 7

## 火柴棒數量

- **數字與符號的火柴棒數量：**
- 9 : 6 根火柴棒
- + : 2 根火柴棒
- 2 : 5 根火柴棒
- = : 2 根火柴棒
- 5 : 5 根火柴棒
- - : 1 根火柴棒
- 7 : 3 根火柴棒

## 題目要求

1. **僅允許移動一根火柴棒。**
2. **移動後，兩個等式必須同時成立為正確的算式。**
3. **需要思考應該移動哪一根火柴棒，以達到上述目標。**

解1：3+2=5、9-2=7
解2：3+2=5、5+2=7