Greasy Fork is available in English.
Scrapes and displays data from the web page based on rules.
当前为
此脚本不应直接安装。它是供其他脚本使用的外部库,要使用该库请加入元指令 // @require https://update.greasyfork.icu/scripts/471264/1222745/MagicScraper.js
// ==UserScript==
// @name MagicScraper
// @namespace http://tampermonkey.net/
// @version 0.1
// @description Scrapes and displays data from the web page based on rules.
// @author aolko
// @match *://*/*
// @grant GM_addStyle
// ==/UserScript==
function magicScraper(rules, options = {}) {
function createDOMFromScrapedData(data, element, keepChildren) {
for (const key in data) {
if (typeof data[key] === 'string') {
const newElement = document.createElement(element.tagName);
newElement.innerHTML = data[key];
if (keepChildren && element.children.length > 0) {
Array.from(element.children).forEach(child => {
newElement.appendChild(child);
});
}
element.appendChild(newElement);
} else if (typeof data[key] === 'object') {
const newElement = document.createElement(element.tagName);
element.appendChild(newElement);
createDOMFromScrapedData(data[key], newElement, keepChildren);
}
}
}
function matchPageOrDomain(pattern, current) {
if (pattern === '*' || pattern === current) return true;
const regexPattern = new RegExp(`^${pattern.replace('*', '.*')}$`, 'i');
return regexPattern.test(current);
}
function scrapeDataByRules(rulesObj, currentDomain, currentPage) {
const domainKeys = Object.keys(rulesObj);
for (const domainPattern of domainKeys) {
if (matchPageOrDomain(domainPattern, currentDomain)) {
const domainData = rulesObj[domainPattern];
// Check if the domain has subdomain-specific rules
if (currentDomain !== domainPattern && currentDomain.endsWith(`.${domainPattern}`)) {
const subdomain = currentDomain.slice(0, currentDomain.indexOf(`.${domainPattern}`));
if (subdomain in domainData) {
return domainData[subdomain];
}
}
// Check for page-specific rules first
if (currentPage in domainData.pages) {
return Object.assign({}, domainData, domainData.pages[currentPage]);
}
return domainData;
}
}
return {};
}
function loadExternalRules(externalRulesURL, currentDomain, currentPage, callback) {
fetch(externalRulesURL)
.then(response => response.json())
.then(data => {
const rulesObj = data.rules || {};
const scrapedData = scrapeDataByRules(rulesObj, currentDomain, currentPage);
callback(scrapedData);
})
.catch(err => {
console.error('Error loading external rules:', err);
callback({});
});
}
function runScraping() {
const currentDomain = window.location.hostname;
const currentPage = window.location.pathname;
let pageRules;
if (typeof rules === 'string') {
// Load external rules if the rules parameter is a URL string
loadExternalRules(rules, currentDomain, currentPage, scrapedData => {
pageRules = scrapedData;
handleRules(pageRules);
});
} else {
pageRules = scrapeDataByRules(rules, currentDomain, currentPage);
handleRules(pageRules);
}
}
function handleRules(pageRules) {
if (Object.keys(pageRules).length === 0) {
console.warn('No rules found for the current domain and page.');
return;
}
const fragment = document.createDocumentFragment();
createDOMFromScrapedData(pageRules, fragment, options.keepChildren);
document.body.innerHTML = '';
document.body.appendChild(fragment);
}
runScraping();
}