Greasy Fork

来自缓存

Greasy Fork is available in English.

知网PDF下载助手

直接以PDF格式下载知网上的文献, 包括期刊论文和博硕士论文

当前为 2020-12-20 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @id             CNKI_PDF_Supernova
// @name           知网PDF下载助手
// @version        3.2.1
// @namespace      https://github.com/supernovaZhangJiaXing/Tampermonkey/
// @author         Supernova
// @description    直接以PDF格式下载知网上的文献, 包括期刊论文和博硕士论文
// @include        http*://*.cnki.net/*
// @include        http*://*.cnki.net.*/*
// @include        */DefaultResult/Index*
// @include        */KNS8/AdvSearch*
// @include        */detail.aspx*
// @include        */CatalogViewPage.aspx*
// @include        */Article/*
// @include        */kns/brief/*
// @include        */kns55/brief/*
// @include        */grid2008/brief/*
// @include        */detail/detail.aspx*
// @exclude        http://image.cnki.net/*
// @require        https://unpkg.com/[email protected]/dist/pdf-lib.js
// @require        https://cdn.jsdelivr.net/gh/tabedit/code-segment@f730e9d9573c4ca7e758766e6a8fb282faae38af/outline.esm.js
// @run-at         document-idle
// @grant          unsafeWindow
// @grant          GM_setClipboard
// @grant          GM_xmlhttpRequest
// ==/UserScript==

'use strict';
var $ = unsafeWindow.jQuery;
var contents = '';
var pdf;

$(document).ready(function() {
    var myurl = window.location.href;
    var isDetailPage = myurl.indexOf("detail.aspx") != -1 ? true: false; // 点进文献后的详情页
    var isContentPage = myurl.indexOf("kdoc/download.aspx?") != -1 ? true : false; // 分章下载

    if (isDetailPage === false) {
        if (window.location.href.indexOf("kns8") != -1){ // 文献检索页面, 防止在别处出现
            $(document).ajaxSuccess(function() {
                if (arguments[2].url.indexOf('/Brief/GetGridTableHtml') + 1) {
                    // 防止标志-链接不匹配, 统一默认转换为PDF
                    $('.downloadlink').attr('href', toPDF);
                    convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
                }
            });
            var convert_box = document.createElement("div");
            convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
            convert_box.className = "ecp_tn-tab";
            convert_box.style = "color: blue; font-weight: bolder";
            convert_box.onclick = function() {
                if (convert_box.innerText == "当前默认下载格式: PDF (点击转换)") {
                    convert_box.innerText = "当前默认下载格式: CAJ (点击转换)";
                    $('.downloadlink').attr('href', toCAJ);
                } else if (convert_box.innerText == "当前默认下载格式: CAJ (点击转换)") {
                    convert_box.innerText = "当前默认下载格式: PDF (点击转换)";
                    $('.downloadlink').attr('href', toPDF);
                }
            };
            var mycnki = $('#icnki');
            mycnki.parent()[0].insertBefore(convert_box, mycnki.nextElementSibling);
        }
    }
    // 如果进了详情页, 博硕士论文下面会出现五个个按钮: 手机, 整本, 分页, 分章, 在线
    // 期刊论文下会有三个按钮
    else {
        // 只对"博硕论文"详情页做优化, 否则影响期刊页面的显示
        // 新版界面更改了详情页的显示方式, 通过类似CMFD的字样判断是否为博硕论文页面
        if (location.search.match(/dbcode=C[DM][FM]D&/i)) {
            // 整本下载替换为CAJ下载
            $(".btn-dlcaj").first().html($(".btn-dlcaj").first().html().replace("整本", "CAJ"));
            // pdf文件的url
            var pdf_url = $(".btn-dlpdf").remove().find("a").attr("href").replace("&dflag=downpage", "&dflag=pdfdown");
            // 添加PDF下载
            var pdf_down = $('<li class="btn-dlpdf"><a href=' + pdf_url + ' id="pdfDown" target="_blank" name = "pdfDown"><i></i>PDF下载</a></li>');
            $(".btn-dlcaj").first().after(pdf_down);
            // 从分章下载获取目录的URL
            var content_url = $(".btn-dlcaj:eq(1)").find("a").attr("href") || '?';
            content_url = 'https://chn.oversea.cnki.net/kcms/download.aspx' + content_url.match(/\?.*/)[0];
            GM_xmlhttpRequest({method: 'GET', url: content_url, onload: manage_contents});
            // 右侧添加使用说明
            $(".operate-btn").append($('<li class="btn-phone"><a target="_blank" '
                                       + 'href="https://mp.weixin.qq.com/s?__biz=MzU5MTY4NDUzMg==&mid=2247484384'
                                       + '&idx=1&sn=6a135e824793d26b5bd8884b78c1f751&chksm=fe2a753bc95dfc2d3a5f6'
                                       + '383553fc369894c5021619c85bb7554583bdcb8c10624bf2a7097e1&token=462651491&lang=zh_CN#rd">脚本说明</a></li>'));
            // 右侧底部添加工具下载(PdgContentEditor)
            var cnt_util_for_mac = gen_py_for_FUCKING_mac();
            $(".opts-down").append($('<div class="fl info" style="font-size: 13px; border-left: 1px solid #ddd;"><p class="total-inform" style="margin-left: 3px">'
                                     + '<span>目录合并软件: <a href="https://pan.baidu.com/s/12i-uv3xALmHOjKpnO691aw" target="_blank">PdgContentEditor下载</a></span>'
                                     + '<span>Mac系统合并目录办法: <a target="_blank">保存此代码并用Python 3运行</a> (需先安装PyPDF2库)</span></p></div>').click(function(){
                var data = new Blob([gen_py_for_FUCKING_mac()],{type:"text/plain; charset=UTF-8"});
                $(this).find('a').last().attr("download", 'pdf_utils.py');
                $(this).find('a').last().attr("href", window.URL.createObjectURL(data));
                window.URL.revokeObjectURL(data);
            }))
        }
    }
});

function gen_py_for_FUCKING_mac() {
    return "# coding:utf-8\n"
+ "# 往pdf文件中添加书签\n"
+ "# 来源: https://www.jianshu.com/p/1aac3ae4d620\n"
+ "# 执行前需要安装库 PyPDF2: pip install PyPDF2\n"
+ "import os\n"
+ "from PyPDF2 import PdfFileReader as reader, PdfFileWriter as writer\n"
+ "\n"
+ "\n"
+ "class PDFHandleMode(object):\n"
+ "    '''\n"
+ "    处理PDF文件的模式\n"
+ "    '''\n"
+ "    # 保留源PDF文件的所有内容和信息,在此基础上修改\n"
+ "    COPY = 'copy'\n"
+ "    # 仅保留源PDF文件的页面内容,在此基础上修改\n"
+ "    NEWLY = 'newly'\n"
+ "\n"
+ "\n"
+ "def add_outline(file_name, content_file_name):\n"
+ "    '''\n"
+ "    给PDF文件挂载书签\n"
+ "    :param file_name: PDF文件路径\n"
+ "    :param content_file_name: 目录文件路径\n"
+ "    '''\n"
+ "    pdf_handler = MyPDFHandler(file_name, mode=PDFHandleMode.NEWLY)\n"
+ "    pdf_handler.add_bookmarks_by_read_txt(content_file_name)\n"
+ "    pdf_handler.save2file(file_name.split('.')[0] + u'-目录书签版.pdf')\n"
+ "\n"
+ "\n"
+ "def read_bookmarks_from_txt(txt_file_path, page_offset=0):\n"
+ "    '''\n"
+ "    从文本文件中读取书签列表\n"
+ "    文本文件有若干行,每行一个书签,内容格式为:\n"
+ "    书签标题\t页码\n"
+ "    注:中间用空格隔开,页码为1表示第1页\n"
+ "    :param txt_file_path: 书签信息文本文件路径\n"
+ "    :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n"
+ "    :return: 书签列表\n"
+ "    '''\n"
+ "    bookmarks = []\n"
+ "    with open(txt_file_path, 'r', encoding='utf-8') as fin:\n"
+ "        for line in fin:\n"
+ "            line = line.rstrip()\n"
+ "            if not line:\n"
+ "                continue\n"
+ "            # 以'@'作为标题、页码分隔符\n"
+ "            print('read line is: {0}'.format(line))\n"
+ "            try:\n"
+ "                title = line.split('\t')[0].rstrip()\n"
+ "                page = line.split('\t')[1].strip()\n"
+ "            except IndexError as msg:\n"
+ "                print(msg)\n"
+ "                continue\n"
+ "            # title和page都不为空才添加书签,否则不添加\n"
+ "            if title and page:\n"
+ "                try:\n"
+ "                    page = int(page) + page_offset\n"
+ "                    bookmarks.append((title, page))\n"
+ "                except ValueError as msg:\n"
+ "                    print(msg)\n"
+ "\n"
+ "    return bookmarks\n"
+ "\n"
+ "\n"
+ "class MyPDFHandler(object):\n"
+ "    '''\n"
+ "    封装的PDF文件处理类\n"
+ "    '''\n"
+ "\n"
+ "    def __init__(self, pdf_file_path, mode=PDFHandleMode.COPY):\n"
+ "        '''\n"
+ "        用一个PDF文件初始化\n"
+ "        :param pdf_file_path: PDF文件路径\n"
+ "        :param mode: 处理PDF文件的模式,默认为PDFHandleMode.COPY模式\n"
+ "        '''\n"
+ "        # 只读的PDF对象\n"
+ "        self.__pdf = reader(pdf_file_path, strict=False)\n"
+ "\n"
+ "        # 获取PDF文件名(不带路径)\n"
+ "        self.file_name = os.path.basename(pdf_file_path)\n"
+ "        #\n"
+ "        self.metadata = self.__pdf.getXmpMetadata()\n"
+ "        #\n"
+ "        self.doc_info = self.__pdf.getDocumentInfo()\n"
+ "        #\n"
+ "        self.pages_num = self.__pdf.getNumPages()\n"
+ "\n"
+ "        # 可写的PDF对象,根据不同的模式进行初始化\n"
+ "        self.__writeable_pdf = writer()\n"
+ "        if mode == PDFHandleMode.COPY:\n"
+ "            self.__writeable_pdf.cloneDocumentFromReader(self.__pdf)\n"
+ "        elif mode == PDFHandleMode.NEWLY:\n"
+ "            for idx in range(self.pages_num):\n"
+ "                page = self.__pdf.getPage(idx)\n"
+ "                self.__writeable_pdf.insertPage(page, idx)\n"
+ "\n"
+ "    def save2file(self, new_file_name):\n"
+ "        '''\n"
+ "        将修改后的PDF保存成文件\n"
+ "        :param new_file_name: 新文件名,不要和原文件名相同\n"
+ "        :return: None\n"
+ "        '''\n"
+ "        # 保存修改后的PDF文件内容到文件中\n"
+ "        with open(new_file_name, 'wb') as fout:\n"
+ "            self.__writeable_pdf.write(fout)\n"
+ "        print('save2file success! new file is: {0}'.format(new_file_name))\n"
+ "\n"
+ "    def add_one_bookmark(self, title, page, parent=None, color=None, fit='/Fit'):\n"
+ "        '''\n"
+ "        往PDF文件中添加单条书签,并且保存为一个新的PDF文件\n"
+ "        :param str title: 书签标题\n"
+ "        :param int page: 书签跳转到的页码,表示的是PDF中的绝对页码,值为1表示第一页\n"
+ "        :paran parent: A reference to a parent bookmark to create nested bookmarks.\n"
+ "        :param tuple color: Color of the bookmark as a red, green, blue tuple from 0.0 to 1.0\n"
+ "        :param list bookmarks: 是一个'(书签标题,页码)'二元组列表,举例:[(u'tag1',1),(u'tag2',5)],页码为1代表第一页\n"
+ "        :param str fit: 跳转到书签页后的缩放方式\n"
+ "        :return: None\n"
+ "        '''\n"
+ "        # 为了防止乱码,这里对title进行utf-8编码\n"
+ "        # Supernova: 好像不需要, 先不编码\n"
+ "        self.__writeable_pdf.addBookmark(title, page - 1, parent=parent, color=color, fit=fit)\n"
+ "        print('add_one_bookmark success! bookmark title is: {0}'.format(title))\n"
+ "\n"
+ "    def add_bookmarks(self, bookmarks):\n"
+ "        '''\n"
+ "        批量添加书签\n"
+ "        :param bookmarks: 书签元组列表,其中的页码表示的是PDF中的绝对页码,值为1表示第一页\n"
+ "        :return: None\n"
+ "        '''\n"
+ "        for title, page in bookmarks:\n"
+ "            self.add_one_bookmark(title, page)\n"
+ "        print('add_bookmarks success! add {0} pieces of bookmarks to PDF file'.format(len(bookmarks)))\n"
+ "\n"
+ "    def add_bookmarks_by_read_txt(self, txt_file_path, page_offset=0):\n"
+ "        '''\n"
+ "        通过读取书签列表信息文本文件,将书签批量添加到PDF文件中\n"
+ "        :param txt_file_path: 书签列表信息文本文件\n"
+ "        :param page_offset: 页码便宜量,为0或正数,即由于封面、目录等页面的存在,在PDF中实际的绝对页码比在目录中写的页码多出的差值\n"
+ "        :return: None\n"
+ "        '''\n"
+ "        bookmarks = read_bookmarks_from_txt(txt_file_path, page_offset)\n"
+ "        self.add_bookmarks(bookmarks)\n"
+ "        print('add_bookmarks_by_read_txt success!')\n"
+ "\n"
+ "\n"
+ "def main(bookname, outline):\n"
+ "    # 读取PDF文件,创建PdfFileReader对象\n"
+ "    book = reader(bookname, strict=False)\n"
+ "\n"
+ "    # 创建PdfFileWriter对象,并用拷贝reader对象进行初始化\n"
+ "    pdf = writer()\n"
+ "    pdf.cloneDocumentFromReader(book)\n"
+ "\n"
+ "    # 添加书签\n"
+ "    # 注意:页数是从0开始的,中文要用unicode字符串,否则会出现乱码\n"
+ "    # 如果这里的页码超过文档的最大页数,会报IndexError异常\n"
+ "    pdf.addBookmark(outline, 0)\n"
+ "\n"
+ "    # 保存修改后的PDF文件内容到文件中\n"
+ "    # 注意:这里必须用二进制的'wb'模式来写文件,否则写到文件中的内容都为乱码\n"
+ "    with open('book-with-bookmark.pdf', 'wb') as fout:\n"
+ "        pdf.write(fout)\n"
+ "\n"
+ "\n"
+ "if __name__ == '__main__':\n"
+ "    pdf = input('请输入待添加书签的PDF文件的全路径或将其拖至此处: \n')\n"
+ "    outline = input('请输入待添加的目录索引文件的全路径或将其拖至此处: \n')\n"
+ "    add_outline(pdf.rstrip(), outline.rstrip())\n"
+ "    input('添加完成, 按任意键退出')\n"
+ "\n";
}

function toCAJ() {
    return $(this).data('CAJ', this.href.replace(/&dflag=\w*|$/, '&dflag=nhdown')).data("CAJ");
}

function toPDF() {
    return $(this).data('PDF', this.href.replace(/&dflag=\w*|$/, '&dflag=pdfdown')).data("PDF");
}

function get_content(cnt_list){
    var contents = "";
    for (var i = 0; i < cnt_list.length - 1; i++) { // 长度减一, 因为最后一个是text
        var cnt_item = cnt_list[i].childNodes[1].childNodes[1];
        cnt_item = cnt_item.innerHTML;
        var cnt_page = cnt_list[i].childNodes[3].childNodes[0].textContent.trim().split("-")[0]; // 知网的目录给的是个范围, 正常只需要前半部分
        contents = contents + cnt_item.trim().replace(/&nbsp;/g, " ").replace(/ {4}/g, "\t") + "\t" + cnt_page + "\r\n";
    }
    return contents;
}

function manage_contents(xhr) {
    var cnt_list = $('tr', xhr.responseText); // 目录列表
    var contents = get_content(cnt_list); // 目录内容
    // 添加目录复制
    $('.btn-dlpdf').first().after($('<li class="btn-dlpdf"><a href="javascript:void(0);">目录复制</a></li>').click(function() {
        GM_setClipboard(contents); // 运用油猴脚本自带的复制函数
        window.alert('目录已复制到剪贴板');
    }));
    // 添加目录下载
    $('.btn-dlpdf').first().after($('<li class="btn-dlcaj"><a>目录下载</a></li>').click(function() {
        var data = new Blob([contents],{type:"text/plain; charset=UTF-8"});
        $(this).find('a').attr("download", '目录_' + $('.wx-tit h1:first-child()').text().trim() + '.txt');
        $(this).find('a').attr("href", window.URL.createObjectURL(data));
        window.URL.revokeObjectURL(data);
        window.alert("目录索引已保存, 请使用PdgCntEditor软件将目录整合到PDF中");
    }));
}