πΊοΈ Google Places API Business Scraper
Fetch comprehensive business information with geographic restrictions
β οΈ Important: CORS Limitation
The Google Places API cannot be called directly from a browser due to CORS restrictions. This tool provides you with:
- URL Generator: Builds the correct API URLs for your searches
- Backend Code: Node.js and Python implementations you can run on your server
- CSV Export: Process results into your desired format
Enter city, neighborhood, or address
Enter a valid Google Plus Code
Maximum 50,000 meters
Complete Node.js Implementation
Save as scraper.js and run with: node scraper.js
const https = require('https');
const fs = require('fs');
const API_KEY = 'AIzaSyA_y56jPgjKh--ka6ZudSH_CZGJBXGowmg';
const SEARCH_QUERY = 'Hairdressers';
const LOCATION = 'Thembisa, South Africa';
// Helper function to make HTTPS requests
function makeRequest(url) {
return new Promise((resolve, reject) => {
https.get(url, (res) => {
let data = '';
res.on('data', (chunk) => data += chunk);
res.on('end', () => {
try {
resolve(JSON.parse(data));
} catch (e) {
reject(e);
}
});
}).on('error', reject);
});
}
// Parse opening hours
function parseOpeningHours(periods) {
if (!periods) return '';
const days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday'];
let hoursString = '';
periods.forEach(period => {
if (period.open) {
const day = days[period.open.day];
const openTime = formatTime(period.open.time);
const closeTime = period.close ? formatTime(period.close.time) : '';
hoursString += `${day}${openTime}β${closeTime}`;
}
});
return hoursString;
}
function formatTime(time) {
if (!time) return '';
const hour = parseInt(time.substring(0, 2));
const minute = time.substring(2);
const ampm = hour >= 12 ? 'pm' : 'am';
const formattedHour = hour % 12 || 12;
return `${formattedHour}${minute !== '00' ? ':' + minute : ''} ${ampm}`;
}
// Convert to CSV format
function toCSV(results) {
const headers = [
'Textbox_value', 'Name', 'Header_image', 'Rating', 'Rating_count', 'Price_range',
'Category', 'Detail_URL', 'Tags', 'Address', 'Located_in', 'Current_status',
'Next_status', 'Scraped_at', 'Open_hours', 'Latitude', 'Longitude', 'Plus_code',
'Website', 'Phone', 'Review_keyword1', 'Review_keyword2', 'Review_keyword3',
'Review_keyword4', 'Review_keyword5', 'Review_keyword6', 'Review_keyword7',
'Review_keyword8', 'Review_keyword9', 'Review_keyword10', 'Claim_This_Business'
];
const rows = [headers.join(',')];
results.forEach(place => {
const photoUrl = place.photos?.[0]?.photo_reference
? `https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference=${place.photos[0].photo_reference}&key=${API_KEY}`
: '';
const status = place.opening_hours?.open_now ? 'Open' : 'Closed';
const scrapedAt = new Date().toISOString().replace('T', ' ').substring(0, 19);
const row = [
`"${SEARCH_QUERY} ${LOCATION}"`,
`"${(place.name || '').replace(/"/g, '""')}"`,
`"${photoUrl}"`,
place.rating || '',
place.user_ratings_total || '',
'',
`"${(place.types?.[0]?.replace(/_/g, ' ') || '').replace(/"/g, '""')}"`,
`"${place.url || ''}"`,
'',
`"${(place.formatted_address || '').replace(/"/g, '""')}"`,
'',
status,
'',
scrapedAt,
`"${parseOpeningHours(place.opening_hours?.periods)}"`,
place.geometry?.location?.lat || '',
place.geometry?.location?.lng || '',
`"${place.plus_code?.compound_code || ''}"`,
`"${place.website || ''}"`,
`"${place.formatted_phone_number || ''}"`,
'', '', '', '', '', '', '', '', '', '', ''
];
rows.push(row.join(','));
});
return rows.join('\n');
}
// Main scraping function
async function scrapeBusinesses() {
console.log('Starting scrape...');
const allResults = [];
let nextPageToken = null;
let pageCount = 0;
try {
do {
// Text search
const locationBias = `&locationbias=text:${encodeURIComponent(LOCATION)}`;
const searchUrl = `https://maps.googleapis.com/maps/api/place/textsearch/json?query=${encodeURIComponent(SEARCH_QUERY)}${locationBias}${nextPageToken ? `&pagetoken=${nextPageToken}` : ''}&key=${API_KEY}`;
console.log(`Fetching page ${pageCount + 1}...`);
const searchData = await makeRequest(searchUrl);
if (searchData.status !== 'OK' && searchData.status !== 'ZERO_RESULTS') {
throw new Error(`API Error: ${searchData.status}`);
}
// Get details for each place
if (searchData.results) {
for (const place of searchData.results) {
const detailsUrl = `https://maps.googleapis.com/maps/api/place/details/json?place_id=${place.place_id}&fields=name,formatted_address,formatted_phone_number,website,opening_hours,rating,user_ratings_total,types,photos,geometry,plus_code,business_status,url&key=${API_KEY}`;
const detailsData = await makeRequest(detailsUrl);
if (detailsData.status === 'OK' && detailsData.result) {
allResults.push(detailsData.result);
}
// Delay to avoid rate limiting
await new Promise(resolve => setTimeout(resolve, 100));
}
}
nextPageToken = searchData.next_page_token;
pageCount++;
console.log(`Found ${allResults.length} businesses so far...`);
// Wait before next page
if (nextPageToken) {
await new Promise(resolve => setTimeout(resolve, 2000));
}
} while (nextPageToken && pageCount < 3);
console.log(`\nTotal businesses found: ${allResults.length}`);
// Save to CSV
const csv = toCSV(allResults);
const filename = `business_data_${Date.now()}.csv`;
fs.writeFileSync(filename, csv);
console.log(`Data saved to ${filename}`);
} catch (error) {
console.error('Error:', error.message);
}
}
// Run the scraper
scrapeBusinesses();
π¦ Required Package
npm install https
The 'https' module is built into Node.js, no installation needed
Complete Python Implementation
Save as scraper.py and run with: python scraper.py
import requests
import csv
import time
from datetime import datetime
API_KEY = 'AIzaSyA_y56jPgjKh--ka6ZudSH_CZGJBXGowmg'
SEARCH_QUERY = 'Hairdressers'
LOCATION = 'Thembisa, South Africa'
def format_time(time_str):
"""Format time string like '0900' to '9 am'"""
if not time_str:
return ''
hour = int(time_str[:2])
minute = time_str[2:]
ampm = 'pm' if hour >= 12 else 'am'
formatted_hour = hour % 12 or 12
return f"{formatted_hour}{':' + minute if minute != '00' else ''} {ampm}"
def parse_opening_hours(periods):
"""Parse opening hours from periods array"""
if not periods:
return ''
days = ['Sunday', 'Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday']
hours_string = ''
for period in periods:
if 'open' in period:
day = days[period['open']['day']]
open_time = format_time(period['open'].get('time', ''))
close_time = format_time(period.get('close', {}).get('time', ''))
hours_string += f"{day}{open_time}β{close_time}"
return hours_string
def scrape_businesses():
"""Main scraping function"""
print('Starting scrape...')
all_results = []
next_page_token = None
page_count = 0
try:
while True:
# Build search URL
location_bias = f"&locationbias=text:{requests.utils.quote(LOCATION)}"
search_url = f"https://maps.googleapis.com/maps/api/place/textsearch/json"
search_params = {
'query': SEARCH_QUERY,
'key': API_KEY
}
if next_page_token:
search_params['pagetoken'] = next_page_token
else:
search_url += location_bias
print(f'Fetching page {page_count + 1}...')
# Make text search request
response = requests.get(search_url, params=search_params if not next_page_token else {'pagetoken': next_page_token, 'key': API_KEY})
search_data = response.json()
if search_data['status'] not in ['OK', 'ZERO_RESULTS']:
raise Exception(f"API Error: {search_data['status']}")
# Get details for each place
if 'results' in search_data:
for place in search_data['results']:
details_url = 'https://maps.googleapis.com/maps/api/place/details/json'
details_params = {
'place_id': place['place_id'],
'fields': 'name,formatted_address,formatted_phone_number,website,opening_hours,rating,user_ratings_total,types,photos,geometry,plus_code,business_status,url',
'key': API_KEY
}
details_response = requests.get(details_url, params=details_params)
details_data = details_response.json()
if details_data['status'] == 'OK' and 'result' in details_data:
all_results.append(details_data['result'])
# Delay to avoid rate limiting
time.sleep(0.1)
next_page_token = search_data.get('next_page_token')
page_count += 1
print(f'Found {len(all_results)} businesses so far...')
if not next_page_token or page_count >= 3:
break
# Wait before next page (required by Google)
time.sleep(2)
print(f'\nTotal businesses found: {len(all_results)}')
# Save to CSV
save_to_csv(all_results)
except Exception as e:
print(f'Error: {str(e)}')
def save_to_csv(results):
"""Save results to CSV file"""
filename = f"business_data_{int(time.time())}.csv"
headers = [
'Textbox_value', 'Name', 'Header_image', 'Rating', 'Rating_count', 'Price_range',
'Category', 'Detail_URL', 'Tags', 'Address', 'Located_in', 'Current_status',
'Next_status', 'Scraped_at', 'Open_hours', 'Latitude', 'Longitude', 'Plus_code',
'Website', 'Phone', 'Review_keyword1', 'Review_keyword2', 'Review_keyword3',
'Review_keyword4', 'Review_keyword5', 'Review_keyword6', 'Review_keyword7',
'Review_keyword8', 'Review_keyword9', 'Review_keyword10', 'Claim_This_Business'
]
with open(filename, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(headers)
for place in results:
# Get photo URL
photo_url = ''
if place.get('photos') and len(place['photos']) > 0:
photo_ref = place['photos'][0].get('photo_reference')
if photo_ref:
photo_url = f"https://maps.googleapis.com/maps/api/place/photo?maxwidth=400&photoreference={photo_ref}&key={API_KEY}"
# Get status
status = 'Open' if place.get('opening_hours', {}).get('open_now') else 'Closed'
scraped_at = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
# Build row
row = [
f"{SEARCH_QUERY} {LOCATION}",
place.get('name', ''),
photo_url,
place.get('rating', ''),
place.get('user_ratings_total', ''),
'',
place.get('types', [''])[0].replace('_', ' ') if place.get('types') else '',
place.get('url', ''),
'',
place.get('formatted_address', ''),
'',
status,
'',
scraped_at,
parse_opening_hours(place.get('opening_hours', {}).get('periods')),
place.get('geometry', {}).get('location', {}).get('lat', ''),
place.get('geometry', {}).get('location', {}).get('lng', ''),
place.get('plus_code', {}).get('compound_code', ''),
place.get('website', ''),
place.get('formatted_phone_number', ''),
'', '', '', '', '', '', '', '', '', '', ''
]
writer.writerow(row)
print(f'Data saved to {filename}')
if __name__ == '__main__':
scrape_businesses()
