Back to Blog
TutorialNode.js
12 min read
Node.js Puppeteer with CAPTCHA Automation
Master headless browser automation with Puppeteer and automatic CAPTCHA solving for complex web interactions.
Puppeteer provides a high-level API to control headless Chrome, making it perfect for automating JavaScript-heavy sites with CAPTCHAs.
Installation & Setup
# Create new project
mkdir puppeteer-captcha-automation
cd puppeteer-captcha-automation
npm init -y
# Install dependencies
npm install puppeteer axios
npm install --save-dev @types/node
# Optional: Install puppeteer-extra for plugins
npm install puppeteer-extra puppeteer-extra-plugin-stealth
Basic Puppeteer CAPTCHA Solver
const puppeteer = require('puppeteer');
const axios = require('axios');
class PuppeteerCaptchaSolver {
constructor(apiKey) {
this.apiKey = apiKey;
this.apiUrl = 'https://api.ai4cap.com/v1';
}
async solveCaptcha(imageBase64) {
try {
// Send CAPTCHA to API
const response = await axios.post(
`${this.apiUrl}/tasks`,
{
type: 'ImageToTextTask',
body: imageBase64
},
{
headers: { 'API-Key': this.apiKey }
}
);
const taskId = response.data.taskId;
// Poll for result
while (true) {
const result = await axios.get(
`${this.apiUrl}/tasks/${taskId}`,
{
headers: { 'API-Key': this.apiKey }
}
);
if (result.data.status === 'ready') {
return result.data.solution.text;
} else if (result.data.status === 'failed') {
throw new Error('CAPTCHA solving failed');
}
await new Promise(resolve => setTimeout(resolve, 2000));
}
} catch (error) {
console.error('Error solving CAPTCHA:', error);
throw error;
}
}
async automateWithCaptcha(url) {
const browser = await puppeteer.launch({
headless: false, // Set to true for production
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
try {
const page = await browser.newPage();
// Set viewport and user agent
await page.setViewport({ width: 1366, height: 768 });
await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36');
// Navigate to page
await page.goto(url, { waitUntil: 'networkidle2' });
// Check for CAPTCHA
const captchaSelector = 'img.captcha-image';
const hasCaptcha = await page.$(captchaSelector) !== null;
if (hasCaptcha) {
console.log('CAPTCHA detected, solving...');
// Get CAPTCHA image
const captchaElement = await page.$(captchaSelector);
const captchaBase64 = await captchaElement.screenshot({ encoding: 'base64' });
// Solve CAPTCHA
const solution = await this.solveCaptcha(captchaBase64);
console.log('CAPTCHA solved:', solution);
// Enter solution
await page.type('#captcha-input', solution);
// Submit form
await page.click('#submit-button');
await page.waitForNavigation();
}
// Continue with automation...
const data = await page.evaluate(() => {
// Extract data from page
return {
title: document.querySelector('h1')?.textContent,
content: document.querySelector('.content')?.textContent
};
});
return data;
} finally {
await browser.close();
}
}
}
// Usage
const solver = new PuppeteerCaptchaSolver('YOUR_API_KEY');
solver.automateWithCaptcha('https://example.com')
.then(data => console.log('Extracted data:', data))
.catch(error => console.error('Error:', error));
Advanced Puppeteer Techniques
1. Stealth Mode & Anti-Detection
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
// Use stealth plugin to avoid detection
puppeteer.use(StealthPlugin());
class StealthCaptchaSolver {
async createBrowser() {
return await puppeteer.launch({
headless: false,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled',
'--disable-features=IsolateOrigins,site-per-process'
],
executablePath: '/usr/bin/chromium-browser' // Optional: use system Chrome
});
}
async setupPage(page) {
// Override navigator properties
await page.evaluateOnNewDocument(() => {
Object.defineProperty(navigator, 'webdriver', {
get: () => false
});
// Mock plugins
Object.defineProperty(navigator, 'plugins', {
get: () => [1, 2, 3, 4, 5]
});
// Mock languages
Object.defineProperty(navigator, 'languages', {
get: () => ['en-US', 'en']
});
});
// Add mouse movements
await this.addMouseMovements(page);
// Random delays between actions
page.setDefaultTimeout(30000);
page.setDefaultNavigationTimeout(30000);
}
async addMouseMovements(page) {
// Simulate human-like mouse movements
await page.mouse.move(100, 100);
await page.mouse.move(200, 200, { steps: 10 });
// Random mouse movements
for (let i = 0; i < 5; i++) {
const x = Math.random() * 800;
const y = Math.random() * 600;
await page.mouse.move(x, y, { steps: Math.floor(Math.random() * 10) + 5 });
await page.waitForTimeout(Math.random() * 1000 + 500);
}
}
}
2. Handling reCAPTCHA v2
async function solveRecaptchaV2(page, apiKey) {
// Find reCAPTCHA iframe
const frames = await page.frames();
const recaptchaFrame = frames.find(frame =>
frame.url().includes('google.com/recaptcha')
);
if (!recaptchaFrame) {
console.log('No reCAPTCHA found');
return;
}
// Get site key
const siteKey = await page.evaluate(() => {
const element = document.querySelector('.g-recaptcha');
return element ? element.getAttribute('data-sitekey') : null;
});
if (!siteKey) {
throw new Error('Site key not found');
}
console.log('Solving reCAPTCHA with site key:', siteKey);
// Request token from API
const response = await axios.post(
'https://api.ai4cap.com/v1/tasks',
{
type: 'RecaptchaV2TaskProxyless',
websiteURL: page.url(),
websiteKey: siteKey
},
{
headers: { 'API-Key': apiKey }
}
);
const taskId = response.data.taskId;
// Wait for solution
let token;
while (true) {
const result = await axios.get(
`https://api.ai4cap.com/v1/tasks/${taskId}`,
{ headers: { 'API-Key': apiKey } }
);
if (result.data.status === 'ready') {
token = result.data.solution.gRecaptchaResponse;
break;
}
await new Promise(resolve => setTimeout(resolve, 5000));
}
// Inject token
await page.evaluate((token) => {
document.getElementById('g-recaptcha-response').innerHTML = token;
document.getElementById('g-recaptcha-response').style.display = 'none';
// Trigger callback if exists
if (typeof ___grecaptcha_cfg !== 'undefined') {
Object.entries(___grecaptcha_cfg.clients).forEach(([key, client]) => {
if (client.callback) {
client.callback(token);
}
});
}
}, token);
console.log('reCAPTCHA solved successfully');
}
3. Handling Dynamic Content
class DynamicContentHandler {
async waitForCaptcha(page, timeout = 30000) {
const captchaSelectors = [
'img.captcha-image',
'.g-recaptcha',
'.h-captcha',
'#captcha-container'
];
try {
// Wait for any CAPTCHA to appear
await page.waitForSelector(
captchaSelectors.join(', '),
{ timeout }
);
// Determine CAPTCHA type
for (const selector of captchaSelectors) {
if (await page.$(selector)) {
return this.identifyCaptchaType(page, selector);
}
}
} catch (error) {
console.log('No CAPTCHA found within timeout');
return null;
}
}
async identifyCaptchaType(page, selector) {
if (selector.includes('g-recaptcha')) {
return { type: 'recaptcha', selector };
} else if (selector.includes('h-captcha')) {
return { type: 'hcaptcha', selector };
} else if (selector.includes('captcha-image')) {
return { type: 'image', selector };
}
return { type: 'unknown', selector };
}
async handleAjaxCaptcha(page) {
// Intercept AJAX requests
await page.setRequestInterception(true);
page.on('request', request => {
console.log('Request:', request.url());
request.continue();
});
page.on('response', async response => {
if (response.url().includes('captcha')) {
console.log('CAPTCHA response detected');
const data = await response.json();
console.log('CAPTCHA data:', data);
}
});
}
}
Production-Ready Implementation
const puppeteer = require('puppeteer-extra');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const { Cluster } = require('puppeteer-cluster');
puppeteer.use(StealthPlugin());
class ProductionCaptchaAutomation {
constructor(apiKey, options = {}) {
this.apiKey = apiKey;
this.options = {
maxConcurrency: options.maxConcurrency || 5,
retryLimit: options.retryLimit || 3,
timeout: options.timeout || 30000,
headless: options.headless !== false,
...options
};
}
async initialize() {
// Create cluster for parallel processing
this.cluster = await Cluster.launch({
concurrency: Cluster.CONCURRENCY_CONTEXT,
maxConcurrency: this.options.maxConcurrency,
puppeteer,
puppeteerOptions: {
headless: this.options.headless,
args: [
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-accelerated-2d-canvas',
'--no-first-run',
'--no-zygote',
'--single-process',
'--disable-gpu'
]
}
});
// Define task
await this.cluster.task(async ({ page, data }) => {
return await this.processUrl(page, data);
});
}
async processUrl(page, { url, action }) {
try {
// Setup page
await this.setupPage(page);
// Navigate
await page.goto(url, {
waitUntil: 'networkidle2',
timeout: this.options.timeout
});
// Check for CAPTCHA
const captcha = await this.detectCaptcha(page);
if (captcha) {
await this.solveCaptcha(page, captcha);
}
// Perform action
const result = await this.performAction(page, action);
return {
url,
success: true,
data: result
};
} catch (error) {
console.error(`Error processing ${url}:`, error);
return {
url,
success: false,
error: error.message
};
}
}
async processUrls(urls) {
const results = [];
for (const url of urls) {
const result = await this.cluster.queue({ url, action: 'scrape' });
results.push(result);
}
await this.cluster.idle();
await this.cluster.close();
return results;
}
async setupPage(page) {
// Set viewport
await page.setViewport({
width: 1920,
height: 1080,
deviceScaleFactor: 1
});
// Block unnecessary resources
await page.setRequestInterception(true);
page.on('request', (request) => {
const resourceType = request.resourceType();
if (['image', 'stylesheet', 'font'].includes(resourceType)) {
request.abort();
} else {
request.continue();
}
});
// Add error handling
page.on('error', err => {
console.error('Page error:', err);
});
page.on('pageerror', err => {
console.error('Page error:', err);
});
}
async detectCaptcha(page) {
const captchaTypes = [
{
type: 'recaptcha',
selector: '.g-recaptcha',
siteKeyAttr: 'data-sitekey'
},
{
type: 'hcaptcha',
selector: '.h-captcha',
siteKeyAttr: 'data-sitekey'
},
{
type: 'image',
selector: 'img[class*="captcha"]',
inputSelector: 'input[name*="captcha"]'
}
];
for (const captchaType of captchaTypes) {
const element = await page.$(captchaType.selector);
if (element) {
return captchaType;
}
}
return null;
}
async solveCaptcha(page, captchaInfo) {
switch (captchaInfo.type) {
case 'recaptcha':
await this.solveRecaptcha(page, captchaInfo);
break;
case 'hcaptcha':
await this.solveHcaptcha(page, captchaInfo);
break;
case 'image':
await this.solveImageCaptcha(page, captchaInfo);
break;
}
}
}
// Usage
async function main() {
const automation = new ProductionCaptchaAutomation('YOUR_API_KEY', {
maxConcurrency: 10,
headless: true
});
await automation.initialize();
const urls = [
'https://example.com/page1',
'https://example.com/page2',
// ... more URLs
];
const results = await automation.processUrls(urls);
console.log('Results:', results);
}
main().catch(console.error);
Performance Tips
Optimization | Impact | Implementation |
---|---|---|
Block images/CSS | 50% faster | Request interception |
Use page pool | 3x throughput | Puppeteer Cluster |
Disable GPU | Lower memory | Launch args |
Cache sessions | Skip login | Cookie persistence |
Pro Tip: Use Docker for Consistency
Deploy your Puppeteer automation in Docker containers to ensure consistent behavior across different environments.