Greasy Fork is available in English.
批量抓取知乎回答/文章/收藏夹,支持导出包含“评论区”的单页 HTML 文件。
当前为
// ==UserScript== // @name 知乎批量导出工具 // @namespace http://qtqz.zhihu/ // @version 3.0 // @description 批量抓取知乎回答/文章/收藏夹,支持导出包含“评论区”的单页 HTML 文件。 // @author AI & qtqz logic // @match https://www.zhihu.com/people/* // @match https://www.zhihu.com/collection/* // @icon https://static.zhihu.com/heifetz/favicon.ico // @require https://cdn.jsdelivr.net/npm/[email protected]/dist/FileSaver.min.js // @grant GM_xmlhttpRequest // @grant GM_registerMenuCommand // @license MIT // ==/UserScript== (function () { 'use strict'; // --- 核心配置 --- const CONFIG = { commentLimit: 20, // 每个回答只抓取前20条评论(防止请求过多被封) requestDelay: 800 // 每次请求的间隔毫秒数 }; const STATE = { isRunning: false, items: [], currentType: '', id: '', cancel: false }; const UI = { panel: null, logArea: null, progressBar: null }; // --- UI 构建 --- function initUI() { const style = document.createElement('style'); style.textContent = ` #zbc-panel { position: fixed; top: 100px; right: 20px; width: 340px; background: #fff; box-shadow: 0 4px 12px rgba(0,0,0,0.15); border-radius: 8px; z-index: 9999; font-family: sans-serif; border: 1px solid #ebebeb; display: none; } #zbc-header { padding: 12px 16px; border-bottom: 1px solid #f0f0f0; background: #f6f6f6; border-radius: 8px 8px 0 0; font-weight: bold; color: #1772f6; display: flex; justify-content: space-between; } #zbc-body { padding: 16px; } .zbc-btn { display: block; width: 100%; padding: 8px; margin-bottom: 8px; border: 1px solid #1772f6; color: #1772f6; background: #fff; border-radius: 4px; cursor: pointer; text-align: center; transition: 0.2s; } .zbc-btn:hover { background: #eef6ff; } .zbc-btn:disabled { border-color: #ccc; color: #ccc; cursor: not-allowed; background: #f9f9f9;} .zbc-btn.primary { background: #1772f6; color: #fff; } .zbc-btn.primary:hover { background: #1062d6; } #zbc-log { height: 180px; overflow-y: auto; background: #f9f9f9; border: 1px solid #eee; padding: 8px; font-size: 12px; margin-bottom: 10px; color: #666; line-height: 1.4; } .zbc-progress { height: 4px; background: #eee; width: 100%; margin-bottom: 10px; } .zbc-progress-bar { height: 100%; background: #1772f6; width: 0%; transition: width 0.3s; } .zbc-close { cursor: pointer; color: #999; } .zbc-tip { font-size: 12px; color: #999; margin-bottom: 10px; } `; document.head.appendChild(style); const panel = document.createElement('div'); panel.id = 'zbc-panel'; panel.innerHTML = ` <div id="zbc-header"> <span>知乎导出助手 (含评论版)</span> <span class="zbc-close" onclick="document.getElementById('zbc-panel').style.display='none'">✕</span> </div> <div id="zbc-body"> <div id="zbc-status">请进入用户主页或收藏夹页面</div> <div class="zbc-tip">提示:开启评论抓取会显著降低速度,每个回答仅抓取前 ${CONFIG.commentLimit} 条热评。</div> <div class="zbc-progress"><div class="zbc-progress-bar" id="zbc-bar"></div></div> <div id="zbc-log"></div> <button id="zbc-start" class="zbc-btn primary">开始抓取 (含评论)</button> <button id="zbc-stop" class="zbc-btn" disabled>停止抓取</button> <hr style="border:0; border-top:1px solid #eee; margin: 10px 0;"> <button id="zbc-export-html" class="zbc-btn" disabled>💾 导出单页 HTML (含评论)</button> <button id="zbc-export-json" class="zbc-btn" disabled>💾 导出 JSON</button> </div> `; document.body.appendChild(panel); UI.panel = panel; UI.logArea = document.getElementById('zbc-log'); UI.progressBar = document.getElementById('zbc-bar'); document.getElementById('zbc-start').onclick = startScraping; document.getElementById('zbc-stop').onclick = () => { STATE.cancel = true; log('正在停止...'); }; document.getElementById('zbc-export-html').onclick = () => exportSingleHTML(); document.getElementById('zbc-export-json').onclick = () => exportJSON(); const toggleBtn = document.createElement('div'); toggleBtn.innerText = '📥'; toggleBtn.style.cssText = 'position:fixed; bottom:20px; right:20px; width:40px; height:40px; background:#1772f6; color:#fff; border-radius:50%; text-align:center; line-height:40px; cursor:pointer; z-index:9998; box-shadow:0 2px 10px rgba(0,0,0,0.2); font-size:20px;'; toggleBtn.onclick = () => { panel.style.display = panel.style.display === 'none' ? 'block' : 'none'; detectPage(); }; document.body.appendChild(toggleBtn); } // --- 逻辑控制 --- function log(msg) { const p = document.createElement('div'); p.innerText = `[${new Date().toLocaleTimeString()}] ${msg}`; UI.logArea.prepend(p); } function detectPage() { const url = window.location.href; const statusDiv = document.getElementById('zbc-status'); if (url.includes('/collection/')) { STATE.currentType = 'collection'; STATE.id = url.match(/collection\/(\d+)/)[1]; statusDiv.innerText = `当前:收藏夹 (ID: ${STATE.id})`; return true; } else if (url.includes('/people/')) { const match = url.match(/people\/([^/]+)/); if(match) { STATE.id = match[1]; if(url.includes('/posts') || url.includes('/articles')) { STATE.currentType = 'people_articles'; statusDiv.innerText = `当前:用户文章 (${STATE.id})`; } else { STATE.currentType = 'people_answers'; statusDiv.innerText = `当前:用户回答 (${STATE.id})`; } return true; } } statusDiv.innerText = '未知页面'; return false; } async function startScraping() { if (!detectPage()) { alert('无法识别当前页面'); return; } STATE.isRunning = true; STATE.cancel = false; STATE.items = []; document.getElementById('zbc-start').disabled = true; document.getElementById('zbc-stop').disabled = false; toggleExportBtns(false); let offset = 0; let isEnd = false; const limit = 20; try { while (!isEnd && !STATE.cancel) { let apiUrl = ''; if (STATE.currentType === 'collection') { apiUrl = `https://www.zhihu.com/api/v4/collections/${STATE.id}/items?offset=${offset}&limit=${limit}`; } else if (STATE.currentType === 'people_answers') { apiUrl = `https://www.zhihu.com/api/v4/members/${STATE.id}/answers?include=data%5B*%5D.is_normal%2Ccontent%2Cvoteup_count%2Ccomment_count%2Ccreated_time%2Cupdated_time&offset=${offset}&limit=${limit}&sort_by=created`; } else if (STATE.currentType === 'people_articles') { apiUrl = `https://www.zhihu.com/api/v4/members/${STATE.id}/articles?include=data%5B*%5D.content%2Cvoteup_count%2Ccomment_count&offset=${offset}&limit=${limit}&sort_by=created`; } log(`正在请求列表 offset: ${offset}...`); const data = await fetchAPI(apiUrl); if (data.data && data.data.length > 0) { // --- 核心循环:处理每个条目并抓取评论 --- for (const item of data.data) { if (STATE.cancel) break; const realItem = item.content ? item.content : item; if (!realItem) continue; const processedItem = processItem(realItem); // 抓取评论 log(` └ 抓取评论: ${processedItem.title.slice(0, 10)}...`); try { const comments = await fetchComments(processedItem.id, processedItem.type); processedItem.comments = comments; } catch (err) { log(` (评论抓取失败,跳过)`); processedItem.comments = []; } STATE.items.push(processedItem); // 为了防止封号,每抓取一个完整的(含评论)条目,稍微休息一下 await sleep(CONFIG.requestDelay); } offset += limit; isEnd = data.paging.is_end; UI.progressBar.style.width = '50%'; } else { isEnd = true; } } } catch (e) { log('错误: ' + e.message); console.error(e); } STATE.isRunning = false; document.getElementById('zbc-start').disabled = false; document.getElementById('zbc-stop').disabled = true; UI.progressBar.style.width = '100%'; if (STATE.items.length > 0) { log(`抓取完成!共 ${STATE.items.length} 条。`); toggleExportBtns(true); } else { log('未抓取到数据。'); } } // 获取评论的 API function fetchComments(id, type) { return new Promise((resolve) => { // 知乎 v4 评论接口 // type: 'answers' 或 'articles' const resourceType = type === 'answer' ? 'answers' : 'articles'; const url = `https://www.zhihu.com/api/v4/${resourceType}/${id}/root_comments?order=normal&limit=${CONFIG.commentLimit}&offset=0&status=open`; GM_xmlhttpRequest({ method: "GET", url: url, onload: function(response) { if (response.status === 200) { try { const res = JSON.parse(response.responseText); const comments = res.data.map(c => ({ author: c.author.member.name, content: c.content, vote_count: c.vote_count, created_time: new Date(c.created_time * 1000).toLocaleString() })); resolve(comments); } catch (e) { resolve([]); // 解析失败返回空 } } else { resolve([]); // 请求失败返回空,不阻断主流程 } }, onerror: () => resolve([]) }); }); } function fetchAPI(url) { return new Promise((resolve, reject) => { GM_xmlhttpRequest({ method: "GET", url: url, onload: function(response) { if (response.status === 200) { try { resolve(JSON.parse(response.responseText)); } catch (e) { reject(e); } } else { reject(new Error("API Error: " + response.status)); } }, onerror: (err) => reject(err) }); }); } function processItem(item) { let content = item.content || ''; // 修复图片懒加载 content = content.replace(/<img [^>]*src="[^"]+"[^>]*data-actualsrc="([^"]+)"[^>]*>/g, '<img src="$1">'); content = content.replace(/<img [^>]*src="[^"]+"[^>]*data-original="([^"]+)"[^>]*>/g, '<img src="$1">'); return { id: item.id, type: item.type, // 'answer' or 'article' title: item.question ? item.question.title : item.title, author: item.author.name, content: content, voteup_count: item.voteup_count, created_time: new Date(item.created_time * 1000).toLocaleString(), url: item.url.replace("api.zhihu.com", "www.zhihu.com"), comments: [] // 初始化为空 }; } function sleep(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } function toggleExportBtns(enable) { document.getElementById('zbc-export-html').disabled = !enable; document.getElementById('zbc-export-json').disabled = !enable; } // --- 导出功能 (单页 HTML 含评论) --- function exportSingleHTML() { log('正在生成 HTML...'); const title = `知乎导出_${STATE.currentType}_${new Date().toISOString().slice(0,10)}`; let tocHtml = STATE.items.map((item, index) => { return `<li><a href="#item-${index}">${index + 1}. ${item.title}</a> <span style="font-size:12px;color:#999">(${item.author})</span></li>`; }).join(''); let contentHtml = STATE.items.map((item, index) => { // 生成评论区 HTML let commentsHtml = ''; if (item.comments && item.comments.length > 0) { const commentList = item.comments.map(c => ` <div class="comment-item"> <div class="comment-meta"><span class="comment-author">${c.author}</span> <span class="comment-time">${c.created_time}</span> <span class="comment-vote">👍 ${c.vote_count}</span></div> <div class="comment-content">${c.content}</div> </div> `).join(''); commentsHtml = ` <div class="comments-section"> <div class="comments-title">精选评论 (${item.comments.length}条)</div> ${commentList} </div> `; } else { commentsHtml = `<div class="comments-section" style="color:#ccc;text-align:center;font-size:12px;">暂无评论或未抓取</div>`; } return ` <div class="article-item" id="item-${index}"> <div class="article-header"> <h2><a href="${item.url}" target="_blank">${item.title}</a></h2> <div class="meta"> 作者: ${item.author} | 赞同: ${item.voteup_count} | 时间: ${item.created_time} </div> </div> <div class="article-content"> ${item.content} </div> ${commentsHtml} <div class="back-to-top"><a href="#toc">↑ 回到目录</a></div> <hr class="separator"> </div> `; }).join(''); const finalHtml = ` <!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title>${title}</title> <style> body { font-family: -apple-system, BlinkMacSystemFont, "Helvetica Neue", "PingFang SC", "Microsoft YaHei", sans-serif; max-width: 900px; margin: 0 auto; padding: 20px; line-height: 1.7; color: #121212; background-color: #f6f6f6; } .container { background: #fff; padding: 40px; border-radius: 8px; box-shadow: 0 1px 3px rgba(26,26,26,.1); } h1.main-title { text-align: center; margin-bottom: 30px; color: #1772f6; } #toc { background: #f9f9f9; padding: 20px; border-radius: 6px; margin-bottom: 40px; border: 1px solid #eee; max-height: 400px; overflow-y: auto; } #toc ul { list-style: none; padding: 0; } #toc li { margin-bottom: 8px; border-bottom: 1px dashed #eee; padding-bottom: 4px; } #toc a { text-decoration: none; color: #333; font-size: 14px; } #toc a:hover { color: #1772f6; } .article-item { margin-bottom: 60px; } .article-header h2 { margin-bottom: 10px; font-size: 22px; } .article-header a { text-decoration: none; color: #121212; } .article-header a:hover { color: #1772f6; } .meta { font-size: 13px; color: #8590a6; margin-bottom: 20px; } .article-content img { max-width: 100%; height: auto; display: block; margin: 10px auto; border-radius: 4px; cursor: zoom-in; } .article-content blockquote { border-left: 3px solid #dfe3eb; color: #646464; padding-left: 15px; margin: 1em 0; } .article-content p { margin-bottom: 1.2em; text-align: justify; } /* 评论区样式 */ .comments-section { margin-top: 30px; background: #f6f6f6; padding: 15px; border-radius: 4px; border: 1px solid #eee; } .comments-title { font-weight: bold; margin-bottom: 10px; font-size: 14px; color: #666; } .comment-item { padding: 10px 0; border-bottom: 1px dashed #e0e0e0; } .comment-item:last-child { border-bottom: none; } .comment-meta { font-size: 12px; color: #999; margin-bottom: 4px; } .comment-author { color: #444; font-weight: bold; } .comment-vote { margin-left: 10px; color: #1772f6; } .comment-content { font-size: 13px; color: #333; line-height: 1.5; } .separator { border: 0; height: 1px; background: #ebebeb; margin: 40px 0; } .back-to-top { text-align: right; margin-top: 10px; } .back-to-top a { color: #8590a6; font-size: 12px; text-decoration: none; } </style> </head> <body> <div class="container"> <h1 class="main-title">${title}</h1> <div id="toc"> <h3>目录 (共 ${STATE.items.length} 条)</h3> <ul>${tocHtml}</ul> </div> <div id="main-content"> ${contentHtml} </div> </div> </body> </html> `; const blob = new Blob([finalHtml], { type: "text/html;charset=utf-8" }); saveAs(blob, `${title}.html`); log('HTML 文件已导出!'); } function exportJSON() { const title = `知乎数据_${STATE.currentType}_${STATE.id}`; const blob = new Blob([JSON.stringify(STATE.items, null, 2)], { type: "application/json;charset=utf-8" }); saveAs(blob, `${title}.json`); log('JSON 文件已导出!'); } // 启动 initUI(); })();