Google Places API Business Scraper

πŸ—ΊοΈ Google Places API Business Scraper

Fetch comprehensive business information with geographic restrictions

⚠️ Important: CORS Limitation

The Google Places API cannot be called directly from a browser due to CORS restrictions. This tool provides you with:

  • URL Generator: Builds the correct API URLs for your searches
  • Backend Code: Node.js and Python implementations you can run on your server
  • CSV Export: Process results into your desired format
Enter city, neighborhood, or address
Enter a valid Google Plus Code
Maximum 50,000 meters

Complete Node.js Implementation

Save as scraper.js and run with: node scraper.js

const https = require('https');
const fs = require('fs');

const API_KEY = 'AIzaSyA_y56jPgjKh--ka6ZudSH_CZGJBXGowmg';
const SEARCH_QUERY = 'Hairdressers';
const LOCATION = 'Thembisa, South Africa';

// Helper function to make HTTPS requests
function makeRequest(url) {
    return new Promise((resolve, reject) => {
        https.get(url, (res) => {
            let data = '';
            res.on('data', (chunk) => data += chunk);
            res.on('end', () => {
                try {
                    resolve(JSON.parse(data));
                } catch (e) {
                    reject(e);
                }
            });
        }).on('error', reject);
    });
}

// Parse opening hours
function parseOpeningHours(periods) {
    if (!periods) return '';
    const days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
    let hoursString = '';
    periods.forEach(period => {
        if (period.open) {
            const day = days[period.open.day];
            const openTime = formatTime(period.open.time);
            const closeTime = period.close ? formatTime(period.close.time) : '';
            hoursString += `${day}${openTime}–${closeTime}`;
        }
    });
    return hoursString;
}

function formatTime(time) {
    if (!time) return '';
    const hour = parseInt(time.substring(0, 2));
    const minute = time.substring(2);
    const ampm = hour >= 12 ? 'pm' : 'am';
    const formattedHour = hour % 12 || 12;
    return `${formattedHour}${minute !== '00' ? ':' + minute : ''} ${ampm}`;
}

// Convert to CSV format
function toCSV(results) {
    const headers = [
        'Textbox_value', 'Name', 'Header_image', 'Rating', 'Rating_count', 'Price_range',
        'Category', 'Detail_URL', 'Tags', 'Address', 'Located_in', 'Current_status',
        'Next_status', 'Scraped_at', 'Open_hours', 'Latitude', 'Longitude', 'Plus_code',
        'Website', 'Phone', 'Review_keyword1', 'Review_keyword2', 'Review_keyword3',
        'Review_keyword4', 'Review_keyword5', 'Review_keyword6', 'Review_keyword7',
        'Review_keyword8', 'Review_keyword9', 'Review_keyword10', 'Claim_This_Business'
    ];

    const rows = [headers.join(',')];
    
    results.forEach(place => {
        const photoUrl = place.photos?.[0]?.photo_reference
            ? `https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference=${place.photos[0].photo_reference}&key=${API_KEY}`
            : '';
        
        const status = place.opening_hours?.open_now ? 'Open' : 'Closed';
        const scrapedAt = new Date().toISOString().replace('T', ' ').substring(0, 19);
        
        const row = [
            `"${SEARCH_QUERY} ${LOCATION}"`,
            `"${(place.name || '').replace(/"/g, '""')}"`,
            `"${photoUrl}"`,
            place.rating || '',
            place.user_ratings_total || '',
            '',
            `"${(place.types?.[0]?.replace(/_/g, ' ') || '').replace(/"/g, '""')}"`,
            `"${place.url || ''}"`,
            '',
            `"${(place.formatted_address || '').replace(/"/g, '""')}"`,
            '',
            status,
            '',
            scrapedAt,
            `"${parseOpeningHours(place.opening_hours?.periods)}"`,
            place.geometry?.location?.lat || '',
            place.geometry?.location?.lng || '',
            `"${place.plus_code?.compound_code || ''}"`,
            `"${place.website || ''}"`,
            `"${place.formatted_phone_number || ''}"`,
            '', '', '', '', '', '', '', '', '', '', ''
        ];
        
        rows.push(row.join(','));
    });
    
    return rows.join('\n');
}

// Main scraping function
async function scrapeBusinesses() {
    console.log('Starting scrape...');
    
    const allResults = [];
    let nextPageToken = null;
    let pageCount = 0;
    
    try {
        do {
            // Text search
            const locationBias = `&locationbias=text:${encodeURIComponent(LOCATION)}`;
            const searchUrl = `https://maps.googleapis.com/maps/api/place/textsearch/json?query=${encodeURIComponent(SEARCH_QUERY)}${locationBias}${nextPageToken ? `&pagetoken=${nextPageToken}` : ''}&key=${API_KEY}`;
            
            console.log(`Fetching page ${pageCount + 1}...`);
            const searchData = await makeRequest(searchUrl);
            
            if (searchData.status !== 'OK' && searchData.status !== 'ZERO_RESULTS') {
                throw new Error(`API Error: ${searchData.status}`);
            }
            
            // Get details for each place
            if (searchData.results) {
                for (const place of searchData.results) {
                    const detailsUrl = `https://maps.googleapis.com/maps/api/place/details/json?place_id=${place.place_id}&fields=name,formatted_address,formatted_phone_number,website,opening_hours,rating,user_ratings_total,types,photos,geometry,plus_code,business_status,url&key=${API_KEY}`;
                    
                    const detailsData = await makeRequest(detailsUrl);
                    if (detailsData.status === 'OK' && detailsData.result) {
                        allResults.push(detailsData.result);
                    }
                    
                    // Delay to avoid rate limiting
                    await new Promise(resolve => setTimeout(resolve, 100));
                }
            }
            
            nextPageToken = searchData.next_page_token;
            pageCount++;
            
            console.log(`Found ${allResults.length} businesses so far...`);
            
            // Wait before next page
            if (nextPageToken) {
                await new Promise(resolve => setTimeout(resolve, 2000));
            }
            
        } while (nextPageToken && pageCount < 3);
        
        console.log(`\nTotal businesses found: ${allResults.length}`);
        
        // Save to CSV
        const csv = toCSV(allResults);
        const filename = `business_data_${Date.now()}.csv`;
        fs.writeFileSync(filename, csv);
        
        console.log(`Data saved to ${filename}`);
        
    } catch (error) {
        console.error('Error:', error.message);
    }
}

// Run the scraper
scrapeBusinesses();

πŸ“¦ Required Package

npm install https
The 'https' module is built into Node.js, no installation needed

Complete Python Implementation

Save as scraper.py and run with: python scraper.py

import requests
import csv
import time
from datetime import datetime

API_KEY = 'AIzaSyA_y56jPgjKh--ka6ZudSH_CZGJBXGowmg'
SEARCH_QUERY = 'Hairdressers'
LOCATION = 'Thembisa, South Africa'

def format_time(time_str):
    """Format time string like '0900' to '9 am'"""
    if not time_str:
        return ''
    hour = int(time_str[:2])
    minute = time_str[2:]
    ampm = 'pm' if hour >= 12 else 'am'
    formatted_hour = hour % 12 or 12
    return f"{formatted_hour}{':' + minute if minute != '00' else ''} {ampm}"

def parse_opening_hours(periods):
    """Parse opening hours from periods array"""
    if not periods:
        return ''
    
    days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
    hours_string = ''
    
    for period in periods:
        if 'open' in period:
            day = days[period['open']['day']]
            open_time = format_time(period['open'].get('time', ''))
            close_time = format_time(period.get('close', {}).get('time', ''))
            hours_string += f"{day}{open_time}–{close_time}"
    
    return hours_string

def scrape_businesses():
    """Main scraping function"""
    print('Starting scrape...')
    
    all_results = []
    next_page_token = None
    page_count = 0
    
    try:
        while True:
            # Build search URL
            location_bias = f"&locationbias=text:{requests.utils.quote(LOCATION)}"
            search_url = f"https://maps.googleapis.com/maps/api/place/textsearch/json"
            search_params = {
                'query': SEARCH_QUERY,
                'key': API_KEY
            }
            
            if next_page_token:
                search_params['pagetoken'] = next_page_token
            else:
                search_url += location_bias
            
            print(f'Fetching page {page_count + 1}...')
            
            # Make text search request
            response = requests.get(search_url, params=search_params if not next_page_token else {'pagetoken': next_page_token, 'key': API_KEY})
            search_data = response.json()
            
            if search_data['status'] not in ['OK', 'ZERO_RESULTS']:
                raise Exception(f"API Error: {search_data['status']}")
            
            # Get details for each place
            if 'results' in search_data:
                for place in search_data['results']:
                    details_url = 'https://maps.googleapis.com/maps/api/place/details/json'
                    details_params = {
                        'place_id': place['place_id'],
                        'fields': 'name,formatted_address,formatted_phone_number,website,opening_hours,rating,user_ratings_total,types,photos,geometry,plus_code,business_status,url',
                        'key': API_KEY
                    }
                    
                    details_response = requests.get(details_url, params=details_params)
                    details_data = details_response.json()
                    
                    if details_data['status'] == 'OK' and 'result' in details_data:
                        all_results.append(details_data['result'])
                    
                    # Delay to avoid rate limiting
                    time.sleep(0.1)
            
            next_page_token = search_data.get('next_page_token')
            page_count += 1
            
            print(f'Found {len(all_results)} businesses so far...')
            
            if not next_page_token or page_count >= 3:
                break
            
            # Wait before next page (required by Google)
            time.sleep(2)
        
        print(f'\nTotal businesses found: {len(all_results)}')
        
        # Save to CSV
        save_to_csv(all_results)
        
    except Exception as e:
        print(f'Error: {str(e)}')

def save_to_csv(results):
    """Save results to CSV file"""
    filename = f"business_data_{int(time.time())}.csv"
    
    headers = [
        'Textbox_value', 'Name', 'Header_image', 'Rating', 'Rating_count', 'Price_range',
        'Category', 'Detail_URL', 'Tags', 'Address', 'Located_in', 'Current_status',
        'Next_status', 'Scraped_at', 'Open_hours', 'Latitude', 'Longitude', 'Plus_code',
        'Website', 'Phone', 'Review_keyword1', 'Review_keyword2', 'Review_keyword3',
        'Review_keyword4', 'Review_keyword5', 'Review_keyword6', 'Review_keyword7',
        'Review_keyword8', 'Review_keyword9', 'Review_keyword10', 'Claim_This_Business'
    ]
    
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(headers)
        
        for place in results:
            # Get photo URL
            photo_url = ''
            if place.get('photos') and len(place['photos']) > 0:
                photo_ref = place['photos'][0].get('photo_reference')
                if photo_ref:
                    photo_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference={photo_ref}&key={API_KEY}"
            
            # Get status
            status = 'Open' if place.get('opening_hours', {}).get('open_now') else 'Closed'
            scraped_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            
            # Build row
            row = [
                f"{SEARCH_QUERY} {LOCATION}",
                place.get('name', ''),
                photo_url,
                place.get('rating', ''),
                place.get('user_ratings_total', ''),
                '',
                place.get('types', [''])[0].replace('_', ' ') if place.get('types') else '',
                place.get('url', ''),
                '',
                place.get('formatted_address', ''),
                '',
                status,
                '',
                scraped_at,
                parse_opening_hours(place.get('opening_hours', {}).get('periods')),
                place.get('geometry', {}).get('location', {}).get('lat', ''),
                place.get('geometry', {}).get('location', {}).get('lng', ''),
                place.get('plus_code', {}).get('compound_code', ''),
                place.get('website', ''),
                place.get('formatted_phone_number', ''),
                '', '', '', '', '', '', '', '', '', '', ''
            ]
            
            writer.writerow(row)
    
    print(f'Data saved to {filename}')

if __name__ == '__main__':
    scrape_businesses()