Greasy Fork

Greasy Fork is available in English.

鸿科经纬 题目爬虫 (v8.0 - 增加格式选择)

直接在当前页面循环抓取题目,支持选择JSON或表格(CSV)格式保存,可随时停止并保存已获取的数据。

// ==UserScript==
// @name         鸿科经纬 题目爬虫 (v8.0 - 增加格式选择)
// @namespace    http://tampermonkey.net/
// @version      8.0
// @description  直接在当前页面循环抓取题目,支持选择JSON或表格(CSV)格式保存,可随时停止并保存已获取的数据。
// @author       Gemini
// @match        http://yun.hotmatrix.cn/*
// @match        http://eshopcourse.hotmatrix.cn/*
// @grant        GM_addStyle
// @grant        GM_download
// @icon         https://www.google.com/s2/favicons?sz=64&domain=hotmatrix.cn
// ==/UserScript==

(function() {
    'use strict';

    // --- 全局控制变量 ---
    let isCrawling = false;
    const allQuestions = new Map();

    // --- 工具函数 ---
    const log = (message, ...args) => console.log(`[题目爬虫 v8.0] ${message}`, ...args);
    const err = (message, ...args) => console.error(`[题目爬虫 v8.0] ${message}`, ...args);
    const delay = ms => new Promise(resolve => setTimeout(resolve, ms));

    // --- UI ---
    function updateButtonState(crawling) {
        const startButton = document.getElementById('crawlStartButton');
        const stopButton = document.getElementById('crawlStopButton');
        const formatSelector = document.getElementById('formatSelector');
        if (startButton && stopButton && formatSelector) {
            startButton.disabled = crawling;
            stopButton.disabled = !crawling;
            formatSelector.disabled = crawling; // 爬取时禁止切换格式
            stopButton.style.display = crawling ? 'inline-block' : 'none';
            startButton.textContent = '开始爬取题目';
        }
    }

    // --- 数据转换 ---

    /**
     * 将题目数据转换为 CSV 格式的字符串
     * @param {Array<Object>} data - 题目对象数组
     * @returns {string} CSV 格式的字符串
     */
    function convertToCSV(data) {
        if (!data || data.length === 0) return '';

        // Helper function to escape CSV fields
        const escapeCSV = (field) => {
            if (field === null || field === undefined) return '';
            const str = String(field);
            // If the field contains a comma, a quote, or a newline, wrap it in double quotes.
            // Also, double up any existing double quotes.
            if (str.includes(',') || str.includes('"') || str.includes('\n')) {
                return `"${str.replace(/"/g, '""')}"`;
            }
            return str;
        };

        const headers = ['题号', '类型', '题目', '所有选项', '正确答案'];
        const headerRow = headers.map(escapeCSV).join(',') + '\n';

        const bodyRows = data.map(q => {
            const optionsText = Object.entries(q.options)
                .map(([key, value]) => `${key}: ${value}`)
                .join('\n'); // 用换行符分隔每个选项

            const row = [
                q.number,
                q.type,
                q.title,
                optionsText,
                q.foundAnswer
            ];
            return row.map(escapeCSV).join(',');
        }).join('\n');

        return headerRow + bodyRows;
    }


    // --- 数据处理 ---
    function saveData(data, baseFilename) {
        if (!data || data.length === 0) {
            log('没有可保存的数据。');
            return; // 不再弹窗打扰
        }

        const format = document.getElementById('formatSelector')?.value || 'json';
        log(`用户选择的保存格式: ${format}`);

        let blob, filename, filetype;

        if (format === 'csv') {
            const csvData = convertToCSV(data);
            filetype = 'text/csv;charset=utf-8;';
            blob = new Blob([`\uFEFF${csvData}`], { type: filetype }); // Add BOM for Excel
            filename = `${baseFilename}.csv`;
        } else { // 默认为 json
            const jsonData = JSON.stringify(data, null, 2);
            filetype = 'application/json';
            blob = new Blob([jsonData], { type: filetype });
            filename = `${baseFilename}.json`;
        }


        log(`准备下载 ${data.length} 道题目... 文件名: ${filename}`);
        try {
            GM_download({
                url: URL.createObjectURL(blob),
                name: filename,
                saveAs: true,
                onload: () => log('文件已开始下载。'),
                onerror: (error) => err(`GM_download 下载失败: ${error.error}`)
            });
        } catch (e) {
            err('GM_download 不可用,使用备用下载方法。');
            const a = document.createElement('a');
            a.href = URL.createObjectURL(blob);
            a.download = filename;
            document.body.appendChild(a);
            a.click();
            document.body.removeChild(a);
            URL.revokeObjectURL(a.href);
        }
        alert(`操作完成!即将下载包含 ${data.length} 道题目的 ${format.toUpperCase()} 文件。`);
    }

    // --- 核心爬取逻辑 ---

    async function waitForQuestionChange(doc, oldQuestionNumber) {
        log(`等待题目从 ${oldQuestionNumber} 更新...`);
        let attempts = 0;
        while (attempts < 10) {
            if (!isCrawling) return false; // 如果在等待时用户点击了停止
            const numEl = doc.querySelector('#num');
            const currentNum = numEl ? numEl.innerText.trim() : '';
            if (currentNum && currentNum !== oldQuestionNumber) {
                log(`新题目 ${currentNum} 已加载。`);
                await delay(300);
                return true;
            }
            await delay(1000);
            attempts++;
        }
        err("等待新题目加载超时。");
        return false;
    }

    function stopCrawling() {
        log("用户请求停止,将在当前题目处理完毕后终止...");
        isCrawling = false;
        const stopButton = document.getElementById('crawlStopButton');
        if (stopButton) stopButton.disabled = true; // 防止重复点击
    }

    async function startCrawling() {
        if (isCrawling) return;
        isCrawling = true;
        allQuestions.clear();
        updateButtonState(true);

        try {
            const docContext = document;

            while (isCrawling) {
                await delay(200);

                const rightCon = docContext.querySelector('.rightCon');
                if (!rightCon) {
                    log('找不到题目主容器 .rightCon,爬取结束。');
                    break;
                }

                const numEl = rightCon.querySelector('#num');
                const currentQuestionNumber = numEl ? numEl.innerText.trim() : null;

                if (!currentQuestionNumber) { err('页面结构不完整,找不到题目编号。'); break; }
                if (allQuestions.has(currentQuestionNumber)) { log(`检测到重复题目编号 ${currentQuestionNumber},爬取结束。`); break; }

                const startButton = document.getElementById('crawlStartButton');
                if (startButton) startButton.textContent = `正在爬取第 ${currentQuestionNumber} 题...`;

                const typeEl = rightCon.querySelector('#type');
                const titleEl = rightCon.querySelector('.titleDetail p');
                const optionsUl = rightCon.querySelector('.titleDetail ul');

                let cleanTitle = titleEl.innerText;
                let answer = '未找到';

                const answerSpan = titleEl.querySelector('span[style*="color:red"]');
                if (answerSpan) {
                    const answerMatch = answerSpan.innerText.match(/正确答案:\s*([A-Z]+)/);
                    if (answerMatch) {
                        answer = answerMatch[1];
                        cleanTitle = cleanTitle.replace(answerSpan.innerText, '').trim();
                    }
                }
                if (answer === '未找到') {
                    const selectedLi = optionsUl.querySelector('li.selected');
                    if (selectedLi) answer = selectedLi.querySelector('button')?.innerText.trim() || '未找到';
                }

                const options = {};
                optionsUl.querySelectorAll('li').forEach(li => {
                    const key = li.querySelector('button')?.innerText.trim();
                    const value = li.querySelector('i')?.innerText.trim();
                    if (key && value) options[key] = value;
                });

                const questionData = {
                    number: currentQuestionNumber,
                    type: typeEl.innerText.trim(),
                    title: cleanTitle,
                    options,
                    foundAnswer: answer
                };

                allQuestions.set(currentQuestionNumber, questionData);
                console.groupCollapsed(`[题目 ${questionData.number}] ${questionData.title.substring(0, 40)}...`);
                console.log("题目详情:", questionData);
                console.groupEnd();

                const nextButton = docContext.querySelector('#next');
                if (nextButton && !nextButton.disabled) {
                    log("点击 '下一题'。");
                    nextButton.click();
                    if (!await waitForQuestionChange(docContext, currentQuestionNumber)) {
                        log("点击下一题后题目未更新或被用户中断,爬取结束。");
                        break;
                    }
                } else {
                    log('找不到可点击的“下一题”按钮,爬取结束。');
                    break;
                }
            }
        } catch (error) {
            err(`爬取过程中发生严重错误: ${error.message}`);
            alert(`爬取过程中发生严重错误,请按F12打开控制台查看详情。\n错误: ${error.message}`);
        } finally {
            isCrawling = false; // 确保循环标志被重置
            saveData(Array.from(allQuestions.values()), '鸿科经纬-题目');
            updateButtonState(false);
        }
    }

    function setupUI() {
        if (document.getElementById('crawlContainer')) return;

        const container = document.createElement('div');
        container.id = 'crawlContainer';

        const startButton = document.createElement('button');
        startButton.id = 'crawlStartButton';
        startButton.textContent = '开始爬取题目';
        startButton.addEventListener('click', startCrawling);

        const stopButton = document.createElement('button');
        stopButton.id = 'crawlStopButton';
        stopButton.textContent = '停止并保存';
        stopButton.addEventListener('click', stopCrawling);
        stopButton.style.display = 'none';

        const formatSelector = document.createElement('select');
        formatSelector.id = 'formatSelector';
        formatSelector.innerHTML = `
            <option value="json">JSON 格式</option>
            <option value="csv">表格 (CSV) 格式</option>
        `;

        GM_addStyle(`
            #crawlContainer {
                position: fixed; top: 100px; right: 20px; z-index: 10000;
                display: flex; flex-direction: column; gap: 8px;
            }
            #crawlStartButton, #crawlStopButton, #formatSelector {
                padding: 10px 15px; color: white; border: none;
                border-radius: 5px; cursor: pointer; font-size: 16px;
                box-shadow: 0 4px 8px rgba(0,0,0,0.2);
                transition: background-color 0.3s, color 0.3s;
                text-align: center;
            }
            #crawlStartButton { background-color: #4CAF50; }
            #crawlStartButton:hover { background-color: #45a049; }
            #crawlStartButton:disabled { background-color: #A5A5A5; color: #E0E0E0; cursor: not-allowed; }

            #crawlStopButton { background-color: #f44336; }
            #crawlStopButton:hover { background-color: #d32f2f; }
            #crawlStopButton:disabled { background-color: #A5A5A5; color: #E0E0E0; cursor: not-allowed; }

            #formatSelector { background-color: #008CBA; color: white; }
            #formatSelector:disabled { background-color: #A5A5A5; color: #E0E0E0; cursor: not-allowed; }
        `);

        container.appendChild(startButton);
        container.appendChild(stopButton);
        container.appendChild(formatSelector);
        document.body.appendChild(container);
        log("UI已加载,等待用户操作。");
    }

    if (document.readyState === 'loading') {
        document.addEventListener('DOMContentLoaded', setupUI);
    } else {
        setupUI();
    }

})();