Greasy Fork

Greasy Fork is available in English.

Google Maps Reviews Scraper & Exporter (Enhanced)

Scrapes reviews from Google Maps with improved date handling and photo extraction

当前为 2025-07-24 提交的版本,查看 最新版本

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Greasemonkey 油猴子Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Violentmonkey 暴力猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴Userscripts ,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展,例如 Tampermonkey 篡改猴,才能安装此脚本。

您需要先安装一款用户脚本管理器扩展后才能安装此脚本。

(我已经安装了用户脚本管理器,让我安装!)

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展,比如 Stylus,才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

您需要先安装一款用户样式管理器扩展后才能安装此样式。

(我已经安装了用户样式管理器,让我安装!)

// ==UserScript==
// @name         Google Maps Reviews Scraper & Exporter (Enhanced)
// @namespace    http://tampermonkey.net/
// @version      0.13
// @description  Scrapes reviews from Google Maps with improved date handling and photo extraction
// @author       sharmanhall
// @match        https://www.google.com/maps/place/*
// @icon         https://www.google.com/s2/favicons?sz=64&domain=google.com
// @grant        GM_addStyle
// @license      MIT 
// ==/UserScript==
 
(function() {
    'use strict';
 
    // Add styles for floating panel
    GM_addStyle(`
        #review-scraper-panel {
            position: fixed;
            bottom: 20px;
            right: 20px;
            background-color: #fff;
            border: 1px solid #ccc;
            border-radius: 8px;
            padding: 10px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
            z-index: 1000;
            font-family: Arial, sans-serif;
            min-width: 200px;
        }
        #review-scraper-panel button {
            background-color: #4285f4;
            color: white;
            border: none;
            padding: 8px 12px;
            margin: 5px;
            border-radius: 4px;
            cursor: pointer;
            font-size: 14px;
            width: 100%;
        }
        #review-scraper-panel button:hover {
            background-color: #3367d6;
        }
        #scraper-status {
            margin-top: 10px;
            font-size: 12px;
            color: #666;
        }
        .scraper-progress {
            background-color: #e8f0fe;
            border-radius: 4px;
            padding: 5px;
            margin-top: 5px;
        }
    `);
 
    // Create floating panel
    const panel = document.createElement('div');
    panel.id = 'review-scraper-panel';
    panel.innerHTML = `
        <button id="scrape-reviews">Scrape Reviews</button>
        <button id="copy-to-clipboard">Copy to Clipboard</button>
        <button id="export-csv">Export as CSV</button>
        <div id="scraper-status"></div>
    `;
    document.body.appendChild(panel);
 
    const setStatus = (message) => {
        const statusDiv = document.getElementById('scraper-status');
        statusDiv.innerHTML = `<div class="scraper-progress">${message}</div>`;
    };

    // Enhanced date parsing function
    const parseReviewDate = (dateText) => {
        if (!dateText) return { raw_date: '', estimated_date: '' };
        
        const now = new Date();
        const rawDate = dateText.trim();
        let estimatedDate = '';
        
        try {
            // Handle relative dates
            if (rawDate.includes('ago')) {
                const timeMatch = rawDate.match(/(\d+)\s+(second|minute|hour|day|week|month|year)s?\s+ago/i);
                if (timeMatch) {
                    const [, amount, unit] = timeMatch;
                    const num = parseInt(amount);
                    const date = new Date(now);
                    
                    switch(unit.toLowerCase()) {
                        case 'second': date.setSeconds(date.getSeconds() - num); break;
                        case 'minute': date.setMinutes(date.getMinutes() - num); break;
                        case 'hour': date.setHours(date.getHours() - num); break;
                        case 'day': date.setDate(date.getDate() - num); break;
                        case 'week': date.setDate(date.getDate() - (num * 7)); break;
                        case 'month': date.setMonth(date.getMonth() - num); break;
                        case 'year': date.setFullYear(date.getFullYear() - num); break;
                    }
                    estimatedDate = date.toISOString().split('T')[0];
                }
            } else {
                // Try to parse absolute dates
                const parsedDate = new Date(rawDate);
                if (!isNaN(parsedDate.getTime())) {
                    estimatedDate = parsedDate.toISOString().split('T')[0];
                }
            }
        } catch (e) {
            console.warn('Date parsing error:', e);
        }
        
        return {
            raw_date: rawDate,
            estimated_date: estimatedDate || 'unknown'
        };
    };

    // Extract review photos
    const extractReviewPhotos = (reviewDiv) => {
        const photos = [];
        
        // Look for review images in various possible containers
        const imageSelectors = [
            'img[src*="googleusercontent.com"]',
            'img[src*="lh3.googleusercontent.com"]',
            'img[src*="maps.gstatic.com"]',
            '.KtCyie img', // Common review image container
            '.EDblX img',  // Another possible container
            '.JrO5Xe img'  // Yet another container
        ];
        
        imageSelectors.forEach(selector => {
            const images = reviewDiv.querySelectorAll(selector);
            images.forEach(img => {
                if (img.src && !img.src.includes('avatar') && !img.src.includes('profile')) {
                    // Get higher resolution version of the image
                    let photoUrl = img.src;
                    if (photoUrl.includes('=w') && photoUrl.includes('-h')) {
                        // Replace size parameters with higher resolution
                        photoUrl = photoUrl.replace(/=w\d+-h\d+(-[^=]*)?/g, '=w1024-h768');
                    }
                    
                    if (!photos.includes(photoUrl)) {
                        photos.push(photoUrl);
                    }
                }
            });
        });
        
        return photos;
    };
 
    const autoScroll = async () => {
        setStatus('Auto-scrolling to load all reviews...');
        
        // Try to find and scroll within the reviews container
        const reviewsContainer = document.querySelector('.m6QErb[data-value="Sort"]')?.parentElement?.parentElement;
        
        if (reviewsContainer) {
            let lastScrollHeight = 0;
            let currentScrollHeight = reviewsContainer.scrollHeight;
            let scrollAttempts = 0;
            
            do {
                lastScrollHeight = currentScrollHeight;
                reviewsContainer.scrollTo(0, currentScrollHeight);
                await new Promise(r => setTimeout(r, 2000));
                currentScrollHeight = reviewsContainer.scrollHeight;
                scrollAttempts++;
                
                setStatus(`Loading reviews... (attempt ${scrollAttempts})`);
                
                // Prevent infinite loops
                if (scrollAttempts > 50) break;
                
            } while(currentScrollHeight > lastScrollHeight);
            
            reviewsContainer.scrollTo(0, 0);
        } else {
            // Fallback to window scrolling
            let lastScrollHeight = 0;
            let currentScrollHeight = document.documentElement.scrollHeight;
            
            do {
                lastScrollHeight = currentScrollHeight;
                window.scrollTo(0, currentScrollHeight);
                await new Promise(r => setTimeout(r, 2000));
                currentScrollHeight = document.documentElement.scrollHeight;
            } while(currentScrollHeight > lastScrollHeight);
            
            window.scrollTo(0, 0);
        }
    };
 
    const expandReviews = async () => {
        setStatus('Expanding truncated reviews...');
        
        // Multiple selectors for "More" buttons
        const moreButtonSelectors = [
            "button.w8nwRe.kyuRq",
            "button[aria-label*='more']",
            "button.review-more-link",
            ".review-full-text button"
        ];
        
        for (const selector of moreButtonSelectors) {
            const moreButtons = document.querySelectorAll(selector);
            for (const button of moreButtons) {
                if (button.textContent.toLowerCase().includes('more') || 
                    button.getAttribute('aria-label')?.toLowerCase().includes('more')) {
                    button.click();
                    await new Promise(r => setTimeout(r, 300));
                }
            }
        }
    };
 
    const scrapeReviews = async () => {
        await autoScroll();
        await expandReviews();
 
        setStatus('Extracting review data...');
        const reviewDivs = document.querySelectorAll("div[data-review-id]");
        const reviews = [];
        const scrapedReviewIds = new Set();
 
        for (let i = 0; i < reviewDivs.length; i++) {
            const reviewDiv = reviewDivs[i];
            const reviewId = reviewDiv.getAttribute("data-review-id");
            
            if (scrapedReviewIds.has(reviewId)) continue;
            
            setStatus(`Processing review ${i + 1} of ${reviewDivs.length}...`);
            
            // Extract date information
            const dateElement = reviewDiv.querySelector("span.rsqaWe");
            const dateInfo = parseReviewDate(dateElement?.textContent);
            
            // Extract photos
            const reviewPhotos = extractReviewPhotos(reviewDiv);
            
            // Extract star rating more reliably
            const starElement = reviewDiv.querySelector("span.kvMYJc[role='img']");
            let starRating = 0;
            if (starElement) {
                const ariaLabel = starElement.getAttribute("aria-label");
                const ratingMatch = ariaLabel?.match(/(\d+)/);
                starRating = ratingMatch ? parseInt(ratingMatch[0]) : 0;
            }
            
            // Extract reviewer information
            const reviewerElement = reviewDiv.querySelector("div.d4r55");
            const reviewerName = reviewerElement?.textContent.trim() || '';
            
            // Get profile image with higher resolution
            const profileImg = reviewDiv.querySelector("img.NBa7we");
            let profileImgUrl = '';
            if (profileImg?.src) {
                profileImgUrl = profileImg.src.replace('=w36-h36-p-rp-mo-br100', '=s200');
            }
            
            const review = {
                reviewer_name: reviewerName,
                profile_img_url: profileImgUrl,
                review_date_raw: dateInfo.raw_date,
                review_date_estimated: dateInfo.estimated_date,
                star_rating: starRating,
                review_url: reviewDiv.querySelector("button[data-href]")?.getAttribute("data-href") || '',
                review_content: reviewDiv.querySelector("span.wiI7pd")?.textContent.trim() || '',
                review_photos: reviewPhotos,
                review_id: reviewId,
                scraped_at: new Date().toISOString()
            };
 
            scrapedReviewIds.add(reviewId);
            reviews.push(review);
        }
 
        setStatus(`✅ Successfully scraped ${reviews.length} reviews with enhanced data!`);
        window.scrapedReviews = reviews; // Store globally for other functions
        return reviews;
    };
 
    const copyToClipboard = async () => {
        const reviews = window.scrapedReviews || await scrapeReviews();
        const contentToCopy = JSON.stringify(reviews, null, 2);
        
        try {
            await navigator.clipboard.writeText(contentToCopy);
            setStatus(`✅ ${reviews.length} reviews copied to clipboard as JSON!`);
        } catch (err) {
            setStatus('❌ Error copying to clipboard. Check console.');
            console.error("Could not copy content to clipboard: ", err);
        }
    };

    const exportAsCSV = async () => {
        const reviews = window.scrapedReviews || await scrapeReviews();
        
        // CSV headers
        const headers = [
            'reviewer_name',
            'profile_img_url', 
            'review_date_raw',
            'review_date_estimated',
            'star_rating',
            'review_url',
            'review_content',
            'review_photos',
            'review_id',
            'scraped_at'
        ];
        
        // Convert to CSV
        const csvContent = [
            headers.join(','),
            ...reviews.map(review => 
                headers.map(header => {
                    let value = review[header];
                    if (Array.isArray(value)) {
                        value = value.join('; ');
                    }
                    // Escape quotes and wrap in quotes if contains comma or quote
                    value = String(value).replace(/"/g, '""');
                    if (value.includes(',') || value.includes('"') || value.includes('\n')) {
                        value = `"${value}"`;
                    }
                    return value;
                }).join(',')
            )
        ].join('\n');
        
        // Download CSV file
        const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
        const link = document.createElement('a');
        const url = URL.createObjectURL(blob);
        link.setAttribute('href', url);
        link.setAttribute('download', `google_maps_reviews_${new Date().toISOString().split('T')[0]}.csv`);
        link.style.visibility = 'hidden';
        document.body.appendChild(link);
        link.click();
        document.body.removeChild(link);
        
        setStatus(`✅ CSV file downloaded with ${reviews.length} reviews!`);
    };
 
    // Event listeners
    document.getElementById('scrape-reviews').addEventListener('click', scrapeReviews);
    document.getElementById('copy-to-clipboard').addEventListener('click', copyToClipboard);
    document.getElementById('export-csv').addEventListener('click', exportAsCSV);
    
    // Initialize
    setStatus('Ready to scrape reviews! 🚀');
})();