Greasy Fork

Greasy Fork is available in English.

知乎批量导出工具

批量抓取知乎回答/文章/收藏夹,支持导出包含“评论区”的单页 HTML 文件。

当前为 2025-12-23 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         知乎批量导出工具
// @namespace    http://qtqz.zhihu/
// @version      3.0
// @description  批量抓取知乎回答/文章/收藏夹,支持导出包含“评论区”的单页 HTML 文件。
// @author       AI & qtqz logic
// @match        https://www.zhihu.com/people/*
// @match        https://www.zhihu.com/collection/*
// @icon         https://static.zhihu.com/heifetz/favicon.ico
// @require      https://cdn.jsdelivr.net/npm/[email protected]/dist/FileSaver.min.js
// @grant        GM_xmlhttpRequest
// @grant        GM_registerMenuCommand
// @license      MIT
// ==/UserScript==

(function () {
    'use strict';

    // --- 核心配置 ---
    const CONFIG = {
        commentLimit: 20, // 每个回答只抓取前20条评论(防止请求过多被封)
        requestDelay: 800 // 每次请求的间隔毫秒数
    };

    const STATE = {
        isRunning: false,
        items: [],
        currentType: '',
        id: '',
        cancel: false
    };

    const UI = {
        panel: null,
        logArea: null,
        progressBar: null
    };

    // --- UI 构建 ---
    function initUI() {
        const style = document.createElement('style');
        style.textContent = `
            #zbc-panel {
                position: fixed; top: 100px; right: 20px; width: 340px;
                background: #fff; box-shadow: 0 4px 12px rgba(0,0,0,0.15);
                border-radius: 8px; z-index: 9999; font-family: sans-serif;
                border: 1px solid #ebebeb; display: none;
            }
            #zbc-header {
                padding: 12px 16px; border-bottom: 1px solid #f0f0f0;
                background: #f6f6f6; border-radius: 8px 8px 0 0;
                font-weight: bold; color: #1772f6; display: flex; justify-content: space-between;
            }
            #zbc-body { padding: 16px; }
            .zbc-btn {
                display: block; width: 100%; padding: 8px; margin-bottom: 8px;
                border: 1px solid #1772f6; color: #1772f6; background: #fff;
                border-radius: 4px; cursor: pointer; text-align: center;
                transition: 0.2s;
            }
            .zbc-btn:hover { background: #eef6ff; }
            .zbc-btn:disabled { border-color: #ccc; color: #ccc; cursor: not-allowed; background: #f9f9f9;}
            .zbc-btn.primary { background: #1772f6; color: #fff; }
            .zbc-btn.primary:hover { background: #1062d6; }
            #zbc-log {
                height: 180px; overflow-y: auto; background: #f9f9f9;
                border: 1px solid #eee; padding: 8px; font-size: 12px;
                margin-bottom: 10px; color: #666; line-height: 1.4;
            }
            .zbc-progress { height: 4px; background: #eee; width: 100%; margin-bottom: 10px; }
            .zbc-progress-bar { height: 100%; background: #1772f6; width: 0%; transition: width 0.3s; }
            .zbc-close { cursor: pointer; color: #999; }
            .zbc-tip { font-size: 12px; color: #999; margin-bottom: 10px; }
        `;
        document.head.appendChild(style);

        const panel = document.createElement('div');
        panel.id = 'zbc-panel';
        panel.innerHTML = `
            <div id="zbc-header">
                <span>知乎导出助手 (含评论版)</span>
                <span class="zbc-close" onclick="document.getElementById('zbc-panel').style.display='none'">✕</span>
            </div>
            <div id="zbc-body">
                <div id="zbc-status">请进入用户主页或收藏夹页面</div>
                <div class="zbc-tip">提示:开启评论抓取会显著降低速度,每个回答仅抓取前 ${CONFIG.commentLimit} 条热评。</div>
                <div class="zbc-progress"><div class="zbc-progress-bar" id="zbc-bar"></div></div>
                <div id="zbc-log"></div>
                <button id="zbc-start" class="zbc-btn primary">开始抓取 (含评论)</button>
                <button id="zbc-stop" class="zbc-btn" disabled>停止抓取</button>
                <hr style="border:0; border-top:1px solid #eee; margin: 10px 0;">
                <button id="zbc-export-html" class="zbc-btn" disabled>💾 导出单页 HTML (含评论)</button>
                <button id="zbc-export-json" class="zbc-btn" disabled>💾 导出 JSON</button>
            </div>
        `;
        document.body.appendChild(panel);

        UI.panel = panel;
        UI.logArea = document.getElementById('zbc-log');
        UI.progressBar = document.getElementById('zbc-bar');

        document.getElementById('zbc-start').onclick = startScraping;
        document.getElementById('zbc-stop').onclick = () => { STATE.cancel = true; log('正在停止...'); };
        document.getElementById('zbc-export-html').onclick = () => exportSingleHTML();
        document.getElementById('zbc-export-json').onclick = () => exportJSON();

        const toggleBtn = document.createElement('div');
        toggleBtn.innerText = '📥';
        toggleBtn.style.cssText = 'position:fixed; bottom:20px; right:20px; width:40px; height:40px; background:#1772f6; color:#fff; border-radius:50%; text-align:center; line-height:40px; cursor:pointer; z-index:9998; box-shadow:0 2px 10px rgba(0,0,0,0.2); font-size:20px;';
        toggleBtn.onclick = () => {
            panel.style.display = panel.style.display === 'none' ? 'block' : 'none';
            detectPage();
        };
        document.body.appendChild(toggleBtn);
    }

    // --- 逻辑控制 ---

    function log(msg) {
        const p = document.createElement('div');
        p.innerText = `[${new Date().toLocaleTimeString()}] ${msg}`;
        UI.logArea.prepend(p);
    }

    function detectPage() {
        const url = window.location.href;
        const statusDiv = document.getElementById('zbc-status');

        if (url.includes('/collection/')) {
            STATE.currentType = 'collection';
            STATE.id = url.match(/collection\/(\d+)/)[1];
            statusDiv.innerText = `当前:收藏夹 (ID: ${STATE.id})`;
            return true;
        } else if (url.includes('/people/')) {
            const match = url.match(/people\/([^/]+)/);
            if(match) {
                STATE.id = match[1];
                if(url.includes('/posts') || url.includes('/articles')) {
                    STATE.currentType = 'people_articles';
                    statusDiv.innerText = `当前:用户文章 (${STATE.id})`;
                } else {
                    STATE.currentType = 'people_answers';
                    statusDiv.innerText = `当前:用户回答 (${STATE.id})`;
                }
                return true;
            }
        }
        statusDiv.innerText = '未知页面';
        return false;
    }

    async function startScraping() {
        if (!detectPage()) { alert('无法识别当前页面'); return; }

        STATE.isRunning = true;
        STATE.cancel = false;
        STATE.items = [];

        document.getElementById('zbc-start').disabled = true;
        document.getElementById('zbc-stop').disabled = false;
        toggleExportBtns(false);

        let offset = 0;
        let isEnd = false;
        const limit = 20;

        try {
            while (!isEnd && !STATE.cancel) {
                let apiUrl = '';
                if (STATE.currentType === 'collection') {
                    apiUrl = `https://www.zhihu.com/api/v4/collections/${STATE.id}/items?offset=${offset}&limit=${limit}`;
                } else if (STATE.currentType === 'people_answers') {
                    apiUrl = `https://www.zhihu.com/api/v4/members/${STATE.id}/answers?include=data%5B*%5D.is_normal%2Ccontent%2Cvoteup_count%2Ccomment_count%2Ccreated_time%2Cupdated_time&offset=${offset}&limit=${limit}&sort_by=created`;
                } else if (STATE.currentType === 'people_articles') {
                    apiUrl = `https://www.zhihu.com/api/v4/members/${STATE.id}/articles?include=data%5B*%5D.content%2Cvoteup_count%2Ccomment_count&offset=${offset}&limit=${limit}&sort_by=created`;
                }

                log(`正在请求列表 offset: ${offset}...`);
                const data = await fetchAPI(apiUrl);

                if (data.data && data.data.length > 0) {
                    // --- 核心循环:处理每个条目并抓取评论 ---
                    for (const item of data.data) {
                        if (STATE.cancel) break;

                        const realItem = item.content ? item.content : item;
                        if (!realItem) continue;

                        const processedItem = processItem(realItem);

                        // 抓取评论
                        log(`  └ 抓取评论: ${processedItem.title.slice(0, 10)}...`);
                        try {
                            const comments = await fetchComments(processedItem.id, processedItem.type);
                            processedItem.comments = comments;
                        } catch (err) {
                            log(`  (评论抓取失败,跳过)`);
                            processedItem.comments = [];
                        }

                        STATE.items.push(processedItem);

                        // 为了防止封号,每抓取一个完整的(含评论)条目,稍微休息一下
                        await sleep(CONFIG.requestDelay);
                    }

                    offset += limit;
                    isEnd = data.paging.is_end;
                    UI.progressBar.style.width = '50%';
                } else {
                    isEnd = true;
                }
            }
        } catch (e) {
            log('错误: ' + e.message);
            console.error(e);
        }

        STATE.isRunning = false;
        document.getElementById('zbc-start').disabled = false;
        document.getElementById('zbc-stop').disabled = true;
        UI.progressBar.style.width = '100%';

        if (STATE.items.length > 0) {
            log(`抓取完成!共 ${STATE.items.length} 条。`);
            toggleExportBtns(true);
        } else {
            log('未抓取到数据。');
        }
    }

    // 获取评论的 API
    function fetchComments(id, type) {
        return new Promise((resolve) => {
            // 知乎 v4 评论接口
            // type: 'answers' 或 'articles'
            const resourceType = type === 'answer' ? 'answers' : 'articles';
            const url = `https://www.zhihu.com/api/v4/${resourceType}/${id}/root_comments?order=normal&limit=${CONFIG.commentLimit}&offset=0&status=open`;

            GM_xmlhttpRequest({
                method: "GET",
                url: url,
                onload: function(response) {
                    if (response.status === 200) {
                        try {
                            const res = JSON.parse(response.responseText);
                            const comments = res.data.map(c => ({
                                author: c.author.member.name,
                                content: c.content,
                                vote_count: c.vote_count,
                                created_time: new Date(c.created_time * 1000).toLocaleString()
                            }));
                            resolve(comments);
                        } catch (e) {
                            resolve([]); // 解析失败返回空
                        }
                    } else {
                        resolve([]); // 请求失败返回空,不阻断主流程
                    }
                },
                onerror: () => resolve([])
            });
        });
    }

    function fetchAPI(url) {
        return new Promise((resolve, reject) => {
            GM_xmlhttpRequest({
                method: "GET",
                url: url,
                onload: function(response) {
                    if (response.status === 200) {
                        try {
                            resolve(JSON.parse(response.responseText));
                        } catch (e) {
                            reject(e);
                        }
                    } else {
                        reject(new Error("API Error: " + response.status));
                    }
                },
                onerror: (err) => reject(err)
            });
        });
    }

    function processItem(item) {
        let content = item.content || '';
        // 修复图片懒加载
        content = content.replace(/<img [^>]*src="[^"]+"[^>]*data-actualsrc="([^"]+)"[^>]*>/g, '<img src="$1">');
        content = content.replace(/<img [^>]*src="[^"]+"[^>]*data-original="([^"]+)"[^>]*>/g, '<img src="$1">');

        return {
            id: item.id,
            type: item.type, // 'answer' or 'article'
            title: item.question ? item.question.title : item.title,
            author: item.author.name,
            content: content,
            voteup_count: item.voteup_count,
            created_time: new Date(item.created_time * 1000).toLocaleString(),
            url: item.url.replace("api.zhihu.com", "www.zhihu.com"),
            comments: [] // 初始化为空
        };
    }

    function sleep(ms) {
        return new Promise(resolve => setTimeout(resolve, ms));
    }

    function toggleExportBtns(enable) {
        document.getElementById('zbc-export-html').disabled = !enable;
        document.getElementById('zbc-export-json').disabled = !enable;
    }

    // --- 导出功能 (单页 HTML 含评论) ---

    function exportSingleHTML() {
        log('正在生成 HTML...');
        const title = `知乎导出_${STATE.currentType}_${new Date().toISOString().slice(0,10)}`;

        let tocHtml = STATE.items.map((item, index) => {
            return `<li><a href="#item-${index}">${index + 1}. ${item.title}</a> <span style="font-size:12px;color:#999">(${item.author})</span></li>`;
        }).join('');

        let contentHtml = STATE.items.map((item, index) => {
            // 生成评论区 HTML
            let commentsHtml = '';
            if (item.comments && item.comments.length > 0) {
                const commentList = item.comments.map(c => `
                    <div class="comment-item">
                        <div class="comment-meta"><span class="comment-author">${c.author}</span> <span class="comment-time">${c.created_time}</span> <span class="comment-vote">👍 ${c.vote_count}</span></div>
                        <div class="comment-content">${c.content}</div>
                    </div>
                `).join('');
                commentsHtml = `
                    <div class="comments-section">
                        <div class="comments-title">精选评论 (${item.comments.length}条)</div>
                        ${commentList}
                    </div>
                `;
            } else {
                commentsHtml = `<div class="comments-section" style="color:#ccc;text-align:center;font-size:12px;">暂无评论或未抓取</div>`;
            }

            return `
                <div class="article-item" id="item-${index}">
                    <div class="article-header">
                        <h2><a href="${item.url}" target="_blank">${item.title}</a></h2>
                        <div class="meta">
                            作者: ${item.author} | 赞同: ${item.voteup_count} | 时间: ${item.created_time}
                        </div>
                    </div>
                    <div class="article-content">
                        ${item.content}
                    </div>
                    ${commentsHtml}
                    <div class="back-to-top"><a href="#toc">↑ 回到目录</a></div>
                    <hr class="separator">
                </div>
            `;
        }).join('');

        const finalHtml = `
<!DOCTYPE html>
<html>
<head>
    <meta charset="UTF-8">
    <title>${title}</title>
    <style>
        body { font-family: -apple-system, BlinkMacSystemFont, "Helvetica Neue", "PingFang SC", "Microsoft YaHei", sans-serif; max-width: 900px; margin: 0 auto; padding: 20px; line-height: 1.7; color: #121212; background-color: #f6f6f6; }
        .container { background: #fff; padding: 40px; border-radius: 8px; box-shadow: 0 1px 3px rgba(26,26,26,.1); }
        h1.main-title { text-align: center; margin-bottom: 30px; color: #1772f6; }
        #toc { background: #f9f9f9; padding: 20px; border-radius: 6px; margin-bottom: 40px; border: 1px solid #eee; max-height: 400px; overflow-y: auto; }
        #toc ul { list-style: none; padding: 0; }
        #toc li { margin-bottom: 8px; border-bottom: 1px dashed #eee; padding-bottom: 4px; }
        #toc a { text-decoration: none; color: #333; font-size: 14px; }
        #toc a:hover { color: #1772f6; }
        .article-item { margin-bottom: 60px; }
        .article-header h2 { margin-bottom: 10px; font-size: 22px; }
        .article-header a { text-decoration: none; color: #121212; }
        .article-header a:hover { color: #1772f6; }
        .meta { font-size: 13px; color: #8590a6; margin-bottom: 20px; }
        .article-content img { max-width: 100%; height: auto; display: block; margin: 10px auto; border-radius: 4px; cursor: zoom-in; }
        .article-content blockquote { border-left: 3px solid #dfe3eb; color: #646464; padding-left: 15px; margin: 1em 0; }
        .article-content p { margin-bottom: 1.2em; text-align: justify; }

        /* 评论区样式 */
        .comments-section { margin-top: 30px; background: #f6f6f6; padding: 15px; border-radius: 4px; border: 1px solid #eee; }
        .comments-title { font-weight: bold; margin-bottom: 10px; font-size: 14px; color: #666; }
        .comment-item { padding: 10px 0; border-bottom: 1px dashed #e0e0e0; }
        .comment-item:last-child { border-bottom: none; }
        .comment-meta { font-size: 12px; color: #999; margin-bottom: 4px; }
        .comment-author { color: #444; font-weight: bold; }
        .comment-vote { margin-left: 10px; color: #1772f6; }
        .comment-content { font-size: 13px; color: #333; line-height: 1.5; }

        .separator { border: 0; height: 1px; background: #ebebeb; margin: 40px 0; }
        .back-to-top { text-align: right; margin-top: 10px; }
        .back-to-top a { color: #8590a6; font-size: 12px; text-decoration: none; }
    </style>
</head>
<body>
    <div class="container">
        <h1 class="main-title">${title}</h1>
        <div id="toc">
            <h3>目录 (共 ${STATE.items.length} 条)</h3>
            <ul>${tocHtml}</ul>
        </div>
        <div id="main-content">
            ${contentHtml}
        </div>
    </div>
</body>
</html>
        `;

        const blob = new Blob([finalHtml], { type: "text/html;charset=utf-8" });
        saveAs(blob, `${title}.html`);
        log('HTML 文件已导出!');
    }

    function exportJSON() {
        const title = `知乎数据_${STATE.currentType}_${STATE.id}`;
        const blob = new Blob([JSON.stringify(STATE.items, null, 2)], { type: "application/json;charset=utf-8" });
        saveAs(blob, `${title}.json`);
        log('JSON 文件已导出!');
    }

    // 启动
    initUI();

})();