Greasy Fork

[RED] Similar CD detector

Simple script for testing CD releases for duplicity

目前为 2023-01-29 提交的版本。查看 最新版本

// ==UserScript==
// @name         [RED] Similar CD detector
// @namespace    https://greasyfork.org/users/321857-anakunda
// @version      1.02
// @description  Simple script for testing CD releases for duplicity
// @match        https://redacted.ch/torrents.php?id=*
// @match        https://redacted.ch/torrents.php?page=*&id=*
// @run-at       document-end
// @author       Anakunda
// @copyright    2022, Anakunda (https://greasyfork.org/users/321857-anakunda)
// @license      GPL-3.0-or-later
// @require      https://openuserjs.org/src/libs/Anakunda/xhrLib.min.js
// ==/UserScript==
 
'use strict';
 
const maxRemarks = 60, requestsCache = new Map;
let selected = null;
const getTorrentIds = (...trs) => trs.map(function(tr) {
	if (!(tr instanceof HTMLElement)) throw 'Invalid argument';
	if ((tr = tr.querySelector('a.button_pl')) != null && (tr = parseInt(new URLSearchParams(tr.search).get('torrentid'))) > 0)
		return tr;
	throw 'Failed to get torrent id';
});
 
function testSimilarity(...torrentIds) {
	if (torrentIds.length < 2 || !torrentIds.every(torrentId => torrentId > 0)) return Promise.reject('Invalid argument');
	const msfTime = '(?:(\\d+):)?(\\d+):(\\d+)[\\.\\:](\\d+)';
	const msfToSector = time => Array.isArray(time) || (time = new RegExp('^\\s*' + msfTime + '\\s*$').exec(time)) != null ?
		(((time[1] ? parseInt(time[1]) : 0) * 60 + parseInt(time[2])) * 60 + parseInt(time[3])) * 75 + parseInt(time[4]) : NaN;
	// 1211 + 1287
	const rxRR = /^(?:Selected range|Выбранный диапазон|已选择范围|Gewählter Bereich|Intervallo selezionato|Geselecteerd bereik|Utvalt område|Seleccionar gama|Избран диапазон|Wybrany zakres|Izabrani opseg|Vybraný rozsah)(?:[^\S\r\n]+\((?:Sectors|Секторы|扇区|Sektoren|Settori|Sektorer|Sectores|Сектори|Sektora|Sektory)[^\S\r\n]+(\d+)[^\S\r\n]*-[^\S\r\n]*(\d+)\))?$/m;
	const tocParser = '^\\s*' + ['\\d+', msfTime, msfTime, '\\d+', '\\d+']
		.map(pattern => '(' + pattern + ')').join('\\s+\\|\\s+') + '\\s*$';
	const tocEntriesMapper = tocEntries => Array.isArray(tocEntries) ? tocEntries.map(function(tocEntry, trackNdx) {
		if ((tocEntry = new RegExp(tocParser).exec(tocEntry)) == null)
			throw `assertion failed: track ${trackNdx + 1} ToC entry invalid format`;
		console.assert(msfToSector(tocEntry[2]) == parseInt(tocEntry[12]));
		console.assert(msfToSector(tocEntry[7]) == parseInt(tocEntry[13]) + 1 - parseInt(tocEntry[12]));
		return {
			trackNumber: parseInt(tocEntry[1]),
			startSector: parseInt(tocEntry[12]),
			endSector: parseInt(tocEntry[13]),
		};
	}) : null;
	return Promise.all(torrentIds.map(function loadLogFile(torrentId) {
		if (requestsCache.has(torrentId)) return requestsCache.get(torrentId);
		const request = localXHR('/torrents.php?' + new URLSearchParams({ action: 'loglist', torrentid: torrentId })).then(document =>
			Array.from(document.body.querySelectorAll(':scope > blockquote > pre:first-child'), pre => pre.textContent).filter(function(logfile) {
				const rr = rxRR.exec(logfile);
				if (rr == null) return true;
				// Ditch HTOA logs
				let tocEntries = logfile.match(new RegExp(tocParser, 'gm'));
				if (tocEntries != null) tocEntries = tocEntriesMapper(tocEntries); else return true;
				return parseInt(rr[1]) != 0 || parseInt(rr[2]) + 1 != tocEntries[0].startSector;
			}));
		requestsCache.set(torrentId, request);
		return request;
	})).then(function(logfiles) {
		if (logfiles.some(logfile => logfile.length <= 0)) throw 'one or both releases have invalid logfile';
		if (logfiles.some(lf1 => logfiles.some(lf2 => lf1.length != lf2.length))) throw 'disc count mismatch';
		const remarks = [ ];
		for (let volumeNdx = 0; volumeNdx < logfiles[0].length; ++volumeNdx) {
			function addTrackRemark(trackNdx, remark) {
				if (!(trackNdx in volRemarks)) volRemarks[trackNdx] = [ ];
				volRemarks[trackNdx].push(remark);
			}
			function processTrackValues(patterns, ...callbacks) {
				if (!Array.isArray(patterns) || patterns.length <= 0 || typeof callbacks[patterns.length] != 'function') return;
				const rxs = patterns.map(pattern => new RegExp('^[^\\S\\r\\n]+' + pattern + '\\s*$', 'm'));
				const values = trackRecords.map(trackRecords => trackRecords != null ? trackRecords.map(function(trackRecord, trackNdx) {
					trackRecord = rxs.map(rx => rx.exec(trackRecord));
					for (let index = 0; index < trackRecord.length; ++index) if (trackRecord[index] != null)
						return typeof callbacks[index] == 'function' ? callbacks[index](trackRecord[index]) : trackRecord[index];
				}) : [ ]);
				for (let trackNdx = 0; trackNdx < Math.max(values[0].length, values[1].length); ++trackNdx)
					callbacks[patterns.length](values[0][trackNdx], values[1][trackNdx], trackNdx);
			}
 
			const isRangeRip = logfiles.map(logfiles => rxRR.test(logfiles[volumeNdx])), volRemarks = { };
			if (isRangeRip.some(Boolean))
				remarks.push(`disc ${volumeNdx + 1} having at least one release as range rip, skipping peaks comparison`);
			let tocEntries = logfiles.map(logfile => logfile[volumeNdx].match(new RegExp(tocParser, 'gm')));
			if (tocEntries.some(toc => toc == null)) throw `disc ${volumeNdx + 1} ToC not found for at least one release`;
			if (tocEntries[0].length != tocEntries[1].length) throw `disc ${volumeNdx + 1} ToC lengths mismatch`;
			tocEntries = tocEntries.map(tocEntriesMapper);
			const trackRecords = logfiles.map(logfiles => // 12232
				logfiles[volumeNdx].match(/^(?:Track|Трек|音轨|Traccia|Spår|Pista|Трак|Utwór|Stopa)\s+\d+[^\S\r\n]*$\r?\n(?:^(?:[^\S\r\n]+.*)?$\r?\n)*/gm));
			if (trackRecords.some((trackRecords, ndx) => !isRangeRip[ndx] && trackRecords == null))
				throw `disc ${volumeNdx + 1} no track records could be extracted for at least one rip`;
			else if (!isRangeRip.some(Boolean) && trackRecords[0].length != trackRecords[1].length)
				throw `disc ${volumeNdx + 1} track records count mismatch (${trackRecords[0].length} <> ${trackRecords[1].length})`;
			const htoaCount = tocEntries.filter(tocEntries => tocEntries[0].startSector > 150).length;
			if (htoaCount > 0) remarks.push(`disc ${volumeNdx + 1} ${htoaCount < tocEntries.length ? 'one rip' : 'both rips'} possibly containing leading hidden track (ToC starting at non-zero offset)`);
			// Compare TOCs
			const maxTocShift = 40, maxTocDrift = 40, maxPeakDelta = 0.01;
			const tocShifts = tocEntries[0].map((_, trackNdx) => tocEntries[1][trackNdx].endSector - tocEntries[0][trackNdx].endSector);
			const tocShiftOf = shifts => shifts.length > 0 ? Math.max(...shifts.map(Math.abs)) : 0;
			const tocDriftOf = shifts => shifts.length > 0 ? Math.max(...shifts) - Math.min(...shifts) : 0;
			let shiftsPool = tocShifts.length > 1 ? tocShiftOf(tocShifts.slice(0, -1)) : undefined;
			shiftsPool = tocShifts.find(trackShift => Math.abs(trackShift) == shiftsPool) || 0;
			const hasPostGap = [shiftsPool + 150, shiftsPool - 150].includes(tocShifts[tocShifts.length - 1]); // ??
			shiftsPool = !hasPostGap ? tocShifts : tocShifts.slice(0, -1);
			const tocShift = tocShiftOf(shiftsPool), tocDrift = tocDriftOf(shiftsPool);
			console.assert(tocDrift >= 0);
			const label = `ToC comparison for tid${torrentIds[0]} and tid${torrentIds[1]} disc ${volumeNdx + 1}`;
			console.group(label);
			console.table(tocEntries[0].map((_, trackNdx) => ({
				['track#']: trackNdx + 1,
				['start' + torrentIds[0]]: tocEntries[0][trackNdx].startSector,
				['end' + torrentIds[0]]: tocEntries[0][trackNdx].endSector,
				['length' + torrentIds[0]]: tocEntries[0][trackNdx].endSector + 1 - tocEntries[0][trackNdx].startSector,
				['start' + torrentIds[1]]: tocEntries[1][trackNdx].startSector,
				['end' + torrentIds[1]]: tocEntries[1][trackNdx].endSector,
				['length' + torrentIds[1]]: tocEntries[1][trackNdx].endSector + 1 - tocEntries[1][trackNdx].startSector,
				['tocShift']: tocShifts[trackNdx],
				['assessment']: Math.abs(tocShifts[trackNdx]) > maxTocShift ? 'failed' : 'passed',
			})));
			console.info(`ToC shift = ${tocShift} (${tocShift > maxTocShift ? 'failed' : 'passed'})`);
			console.info(`ToC drift = ${Math.max(...tocShifts)} - ${Math.min(...tocShifts)} = ${Math.max(...tocShifts) - Math.min(...tocShifts)} (${tocDrift > maxTocDrift ? 'failed' : 'passed'})`);
			console.groupEnd(label);
			if (tocShift > maxTocShift) throw `disc ${volumeNdx + 1} ToC shift above ${maxTocShift} sectors`;
			else if (tocDrift > maxTocDrift) throw `disc ${volumeNdx + 1} ToC drift above ${maxTocDrift} sectors`;
			else if (tocDrift > 0) remarks.push(`Disc ${volumeNdx + 1} shifted ToCs by ${tocShift} sectors with ${tocDrift} sectors drift`);
			else if (tocShifts[0] != 0) remarks.push(`Disc ${volumeNdx + 1} shifted ToCs by ${tocShift} sectors`);
			if (hasPostGap) remarks.push(`Disc ${volumeNdx + 1} with post-gap`);
			for (let trackNdx = 0; trackNdx < tocEntries[0].length; ++trackNdx) { // just informational
				const mismatches = [ ];
				if (tocEntries[0][trackNdx].startSector != tocEntries[1][trackNdx].startSector) mismatches.push('offsets');
				if (tocEntries[0][trackNdx].endSector - tocEntries[0][trackNdx].startSector
						!= tocEntries[1][trackNdx].endSector - tocEntries[1][trackNdx].startSector) mismatches.push('lengths');
				if (mismatches.length > 0) addTrackRemark(trackNdx, mismatches.join(' and ') + ' mismatch');
			}
			// Compare pre-gaps - just informational
			if (!isRangeRip.some(Boolean)) processTrackValues([
				'(?:Pre-gap length|Длина предзазора|前间隙长度|Pausenlänge|Durata Pre-Gap|För-gap längd|Longitud Pre-gap|Дължина на предпразнина|Długość przerwy|Pre-gap dužina|[Dd]élka mezery|Dĺžka medzery pred stopou)\\s+' + msfTime, // 1270
				'(?:Pre-gap length)\\s*:\\s*' + msfTime,
			], msfToSector, msfToSector, function(preGap1, preGap2, trackNdx) {
				if ((preGap1 || 0) != (preGap2 || 0)) addTrackRemark(trackNdx, 'pre-gaps mismatch');
			});
			// Compare peaks
			if (!isRangeRip.every(Boolean)) processTrackValues([
				'(?:Peak level|Пиковый уровень|峰值电平|Spitzenpegel|Pauze lengte|Livello di picco|Peak-nivå|Nivel Pico|Пиково ниво|Poziom wysterowania|Vršni nivo|[Šš]pičková úroveň)\\s+(\\d+(?:\\.\\d+)?)\\s*\\%', // 1217
				'(?:Peak)\\s*:\\s*(\\d+(?:\\.\\d+)?)',
			], m => parseFloat(m[1]) / 100, m => parseFloat(m[1]), function(peak1, peak2, trackNdx) {
				if (peak1 == undefined && !isRangeRip[0] || peak2 == undefined && !isRangeRip[1])
					throw `disc ${volumeNdx + 1} track ${trackNdx + 1} peak missing or invalid format`;
				else if (!isRangeRip.some(Boolean)) if (Math.abs(peak2 - peak1) > maxPeakDelta)
					throw `disc ${volumeNdx + 1} track ${trackNdx + 1} peak difference above ${maxPeakDelta}`;
				else if (Math.round(peak2 * 1000) != Math.round(peak1 * 1000)) addTrackRemark(trackNdx, 'peak levels mismatch');
			});
			// Compare checksums - just informational
			if (!isRangeRip.every(Boolean)) processTrackValues([
				'(?:(?:Copy|复制|Kopie|Copia|Kopiera|Copiar|Копиран) CRC|CRC (?:копии|kopii|kopije|kopie|kópie))\\s+([\\da-fA-F]{8})', // 1272
				'(?:CRC32 hash)\\s*:\\s*([\\da-fA-F]{8})',
			], m => parseInt(m[1], 16), m => parseInt(m[1], 16), function(checksum1, checksum2, trackNdx) {
				if (checksum1 == undefined && !isRangeRip[0] || checksum2 == undefined && !isRangeRip[1])
					addTrackRemark(trackNdx, 'checksum missing or invalid format');
				else if (!isRangeRip.some(Boolean) && checksum1 != checksum2) addTrackRemark(trackNdx, 'checksums mismatch');
			});
			// Compare AR signatures - just informational
			if (!isRangeRip.every(Boolean)) for (let v = 2; v > 0; --v) processTrackValues([
				'.+?\\[([\\da-fA-F]{8})\\].+\\(AR v' + v + '\\)',
				'(?:AccurateRip v' + v + ' signature)\\s*:\\s*([\\da-fA-F]{8})',
			], m => parseInt(m[1], 16), m => parseInt(m[1], 16), function(hash1, hash2, trackNdx) {
				if (hash1 == undefined && !isRangeRip[0] || hash2 == undefined && !isRangeRip[1])
					/*addTrackRemark(trackNdx, 'AR v' + v + ' hash missing')*/;
				else if (!isRangeRip.some(Boolean) && hash1 != hash2) addTrackRemark(trackNdx, 'AR v' + v + ' hashes mismatch');
			});
			for (let trackNdx in volRemarks)
				remarks.push(`Disc ${volumeNdx + 1} track ${parseInt(trackNdx) + 1}: ${volRemarks[trackNdx].join(', ')}`);
			const timeStamps = logfiles.map(logfiles => /^(EAC|XLD) (?:extraction logfile from) (.+)$/m.exec(logfiles[volumeNdx]));
			if (timeStamps.every(Boolean) && timeStamps.map(timeStamp => timeStamp[0]).every((timeStamp, ndx, arr) => timeStamp == arr[0]))
				remarks.push(`Disc ${volumeNdx + 1} originates in same ripping session`);
		}
		if (remarks.filter(remark => remark.endsWith('originates in same ripping session')).length == logfiles[0].length) return true;
		return remarks;
	});
}
 
function countSimilar(groupId) {
	if (groupId > 0) return localXHR('/ajax.php?' + new URLSearchParams({ action: 'torrentgroup', id: groupId }), { responseType: 'json' }).then(function(response) {
		if (response.status != 'success') return Promise.reject(response.error);
		const torrentIds = response.response.torrents.filter(torrent => torrent.media == 'CD'
			&& torrent.format == 'FLAC' && torrent.encoding == 'Lossless' && torrent.hasLog).map(torrent => torrent.id);
		const compareWorkers = [ ];
		torrentIds.forEach(function(torrentId1, ndx1) {
			torrentIds.forEach(function(torrentId2, ndx2) {
				if (ndx2 > ndx1) compareWorkers.push(testSimilarity(torrentId1, torrentId2).then(remarks => true, reason => false));
			});
		});
		return Promise.all(compareWorkers).then(results => results.filter(Boolean).length);
	}); else throw 'Invalid argument';
}
 
for (let selector of [
	'table.torrent_table > tbody > tr.group div.group_info > strong > a:last-of-type',
	'table.torrent_table > tbody > tr.group div.group_info > a:last-of-type',
]) for (let a of document.body.querySelectorAll(selector)) {
	a.onclick = function altClickHandler(evt) {
		if (!evt.altKey) return true;
		let groupId = new URLSearchParams(evt.currentTarget.search);
		if ((groupId = parseInt(groupId.get('id'))) > 0) countSimilar(groupId).then(count =>
			{ alert(count > 0 ? `Total ${count} CDs potentially duplicates` : 'No similar CDs found') }, alert);
		return false;
	};
	a.title = 'Use Alt + click to count considerable CD dupes in release group';
}
 
const torrents = Array.prototype.filter.call(document.body.querySelectorAll('table#torrent_details > tbody > tr.torrent_row'),
	tr => (tr = tr.querySelector('td > a')) != null && /\b(?:FLAC)\b.+\b(?:Lossless)\b.+\b(?:Log) \(\-?\d+\s*\%\)/.test(tr.textContent));
if (torrents.length < 2) return;
 
for (let tr of torrents) {
	let torrentId = /^torrent(\d+)$/.exec(tr.id);
	if (torrentId == null || !((torrentId = parseInt(torrentId[1])) > 0)) continue;
	const div = document.createElement('DIV');
	div.innerHTML = '<svg height="14" viewBox="0 0 24 24"><path fill="gray" d="M14.0612 4.7156l4.0067-.7788a.9998.9998 0 10-.3819-1.9629l-4.0264.7826a2.1374 2.1374 0 00-3.7068.7205l-4.0466.7865a.9998.9998 0 10.3819 1.9629l4.0221-.7818a2.1412 2.1412 0 003.751-.729zM7.1782 9.5765a.9997.9997 0 00-1.8115 0l-3.2725 7A.9977.9977 0 002 16.9998v.7275a4.2727 4.2727 0 008.5454 0v-.7275a.9977.9977 0 00-.0942-.4233zm-.9057 2.7846l1.7014 3.6387H4.5713zm.0005 7.6387a2.268 2.268 0 01-2.2454-2h4.4902a2.268 2.268 0 01-2.2448 2zM18.6558 7.5765a.9997.9997 0 00-1.8116 0l-3.273 7a.9977.9977 0 00-.0941.4233v.7275a4.2727 4.2727 0 008.5454 0l.0005-.726a.997.997 0 00-.0943-.4248zm-.9058 2.7841l1.7017 3.6392h-3.4032zm0 7.6392a2.268 2.268 0 01-2.2454-2h4.4903a2.268 2.268 0 01-2.2449 2z" /></svg>';
	div.style = 'float: right; margin-left: 5pt; margin-right: 5pt; padding: 0; visibility: visible; cursor: pointer;';
	div.className = 'compare-release';
	div.onclick = function(evt) {
		console.assert(evt.currentTarget instanceof HTMLElement);
		const setActive = (elem, active  = true) => { elem.children[0].children[0].setAttribute('fill', active ? 'orange' : 'gray') };
		if (selected instanceof HTMLElement) {
			if (selected == evt.currentTarget) {
				selected = null;
				setActive(evt.currentTarget, false);
			} else {
				const target = evt.currentTarget;
				setActive(target, true);
				testSimilarity(...getTorrentIds(selected.parentNode.parentNode, target.parentNode.parentNode)).then(function(remarks) {
					if (remarks === true) return alert('Identical rips (dupes)');
					let message = 'Releases can be duplicates (ToC shift/drift + peaks are too similar)';
					if (remarks.length > 0) message += '\n\n' + (maxRemarks > 0 && remarks.length > maxRemarks ?
						remarks.slice(0, maxRemarks - 1).join('\n') + '\n...' : remarks.join('\n'));
					alert(message);
				}).catch(reason => { alert('Releases not duplicates for the reason ' + reason) }).then(function() {
					[selected, target].forEach(elem => { setActive(elem, false) });
					selected = null;
				});
			}
		} else setActive(selected = evt.currentTarget, true);
	};
	div.title = 'Compare with different CD for similarity';
	const anchor = tr.querySelector('span.torrent_action_buttons');
	if (anchor != null) anchor.insertAdjacentElement('afterend', div);
}
 
const container = document.body.querySelector('table#torrent_details > tbody > tr.colhead_dark > td:first-of-type');
if (container == null) throw 'Torrent table header not found';
const span = document.createElement('SPAN');
span.className = 'brackets';
span.textContent = 'Find CD dupes';
span.style = 'margin-left: 5pt; margin-right: 5pt; float: right; cursor: pointer; font-size: 8pt;';
span.onclick = function(evt) {
	const compareWorkers = [ ];
	torrents.forEach(function(torrent1, ndx1) {
		torrents.forEach(function(torrent2, ndx2) {
			if (ndx2 > ndx1) compareWorkers.push(testSimilarity(...getTorrentIds(torrent1, torrent2))
				.then(remarks => [torrent1, torrent2], reason => 'distinct'));
		});
	});
	if (compareWorkers.length > 0) Promise.all(compareWorkers).then(function(results) {
		if ((results = results.filter(Array.isArray)).length > 0) try {
			function getEditionTitle(elem) {
				while (elem != null && !elem.classList.contains('edition')) elem = elem.previousElementSibling;
				if (elem != null && (elem = elem.querySelector('td.edition_info > strong')) != null) return elem.textContent.trim();
			}
 
			results.forEach(function(sameTorrents, groupNdx) {
				const randColor = () => 0xD0 + Math.floor(Math.random() * (0xF8 - 0xD0));
				const color = ['#dff', '#ffd', '#fdd', '#dfd', '#ddf', '#fdf'][groupNdx]
					|| `rgb(${randColor()}, ${randColor()}, ${randColor()})`;
				for (let elem of sameTorrents) if ((elem = elem.querySelector('div.compare-release')) != null) {
					elem.style.padding = '2px';
					elem.style.border = '1px solid #808080';
					elem.style.borderRadius = '3px';
					elem.style.backgroundColor = color;
				}
			});
			alert('Similar CDs detected in these editions:\n\n' + results.map(sameTorrents =>
				getEditionTitle(sameTorrents[0]) + '\n' + getEditionTitle(sameTorrents[1])).join('\n\n'));
		} catch (e) { alert(e) } else alert('No similar CDs detected');
	});
};
container.append(span);