AI4CAP.COM
Back to Blog
TutorialNode.js

12 min read

Node.js Puppeteer with CAPTCHA Automation

Master headless browser automation with Puppeteer and automatic CAPTCHA solving for complex web interactions.

Installation & Setup

# Create new project mkdir puppeteer-captcha-automation cd puppeteer-captcha-automation npm init -y # Install dependencies npm install puppeteer axios npm install --save-dev @types/node # Optional: Install puppeteer-extra for plugins npm install puppeteer-extra puppeteer-extra-plugin-stealth

Basic Puppeteer CAPTCHA Solver

const puppeteer = require('puppeteer'); const axios = require('axios'); class PuppeteerCaptchaSolver { constructor(apiKey) { this.apiKey = apiKey; this.apiUrl = 'https://api.ai4cap.com/v1'; } async solveCaptcha(imageBase64) { try { // Send CAPTCHA to API const response = await axios.post( `${this.apiUrl}/tasks`, { type: 'ImageToTextTask', body: imageBase64 }, { headers: { 'API-Key': this.apiKey } } ); const taskId = response.data.taskId; // Poll for result while (true) { const result = await axios.get( `${this.apiUrl}/tasks/${taskId}`, { headers: { 'API-Key': this.apiKey } } ); if (result.data.status === 'ready') { return result.data.solution.text; } else if (result.data.status === 'failed') { throw new Error('CAPTCHA solving failed'); } await new Promise(resolve => setTimeout(resolve, 2000)); } } catch (error) { console.error('Error solving CAPTCHA:', error); throw error; } } async automateWithCaptcha(url) { const browser = await puppeteer.launch({ headless: false, // Set to true for production args: ['--no-sandbox', '--disable-setuid-sandbox'] }); try { const page = await browser.newPage(); // Set viewport and user agent await page.setViewport({ width: 1366, height: 768 }); await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'); // Navigate to page await page.goto(url, { waitUntil: 'networkidle2' }); // Check for CAPTCHA const captchaSelector = 'img.captcha-image'; const hasCaptcha = await page.$(captchaSelector) !== null; if (hasCaptcha) { console.log('CAPTCHA detected, solving...'); // Get CAPTCHA image const captchaElement = await page.$(captchaSelector); const captchaBase64 = await captchaElement.screenshot({ encoding: 'base64' }); // Solve CAPTCHA const solution = await this.solveCaptcha(captchaBase64); console.log('CAPTCHA solved:', solution); // Enter solution await page.type('#captcha-input', solution); // Submit form await page.click('#submit-button'); await page.waitForNavigation(); } // Continue with automation... const data = await page.evaluate(() => { // Extract data from page return { title: document.querySelector('h1')?.textContent, content: document.querySelector('.content')?.textContent }; }); return data; } finally { await browser.close(); } } } // Usage const solver = new PuppeteerCaptchaSolver('YOUR_API_KEY'); solver.automateWithCaptcha('https://example.com') .then(data => console.log('Extracted data:', data)) .catch(error => console.error('Error:', error));

Advanced Puppeteer Techniques

1. Stealth Mode & Anti-Detection

const puppeteer = require('puppeteer-extra'); const StealthPlugin = require('puppeteer-extra-plugin-stealth'); // Use stealth plugin to avoid detection puppeteer.use(StealthPlugin()); class StealthCaptchaSolver { async createBrowser() { return await puppeteer.launch({ headless: false, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-blink-features=AutomationControlled', '--disable-features=IsolateOrigins,site-per-process' ], executablePath: '/usr/bin/chromium-browser' // Optional: use system Chrome }); } async setupPage(page) { // Override navigator properties await page.evaluateOnNewDocument(() => { Object.defineProperty(navigator, 'webdriver', { get: () => false }); // Mock plugins Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3, 4, 5] }); // Mock languages Object.defineProperty(navigator, 'languages', { get: () => ['en-US', 'en'] }); }); // Add mouse movements await this.addMouseMovements(page); // Random delays between actions page.setDefaultTimeout(30000); page.setDefaultNavigationTimeout(30000); } async addMouseMovements(page) { // Simulate human-like mouse movements await page.mouse.move(100, 100); await page.mouse.move(200, 200, { steps: 10 }); // Random mouse movements for (let i = 0; i < 5; i++) { const x = Math.random() * 800; const y = Math.random() * 600; await page.mouse.move(x, y, { steps: Math.floor(Math.random() * 10) + 5 }); await page.waitForTimeout(Math.random() * 1000 + 500); } } }

2. Handling reCAPTCHA v2

async function solveRecaptchaV2(page, apiKey) { // Find reCAPTCHA iframe const frames = await page.frames(); const recaptchaFrame = frames.find(frame => frame.url().includes('google.com/recaptcha') ); if (!recaptchaFrame) { console.log('No reCAPTCHA found'); return; } // Get site key const siteKey = await page.evaluate(() => { const element = document.querySelector('.g-recaptcha'); return element ? element.getAttribute('data-sitekey') : null; }); if (!siteKey) { throw new Error('Site key not found'); } console.log('Solving reCAPTCHA with site key:', siteKey); // Request token from API const response = await axios.post( 'https://api.ai4cap.com/v1/tasks', { type: 'RecaptchaV2TaskProxyless', websiteURL: page.url(), websiteKey: siteKey }, { headers: { 'API-Key': apiKey } } ); const taskId = response.data.taskId; // Wait for solution let token; while (true) { const result = await axios.get( `https://api.ai4cap.com/v1/tasks/${taskId}`, { headers: { 'API-Key': apiKey } } ); if (result.data.status === 'ready') { token = result.data.solution.gRecaptchaResponse; break; } await new Promise(resolve => setTimeout(resolve, 5000)); } // Inject token await page.evaluate((token) => { document.getElementById('g-recaptcha-response').innerHTML = token; document.getElementById('g-recaptcha-response').style.display = 'none'; // Trigger callback if exists if (typeof ___grecaptcha_cfg !== 'undefined') { Object.entries(___grecaptcha_cfg.clients).forEach(([key, client]) => { if (client.callback) { client.callback(token); } }); } }, token); console.log('reCAPTCHA solved successfully'); }

3. Handling Dynamic Content

class DynamicContentHandler { async waitForCaptcha(page, timeout = 30000) { const captchaSelectors = [ 'img.captcha-image', '.g-recaptcha', '.h-captcha', '#captcha-container' ]; try { // Wait for any CAPTCHA to appear await page.waitForSelector( captchaSelectors.join(', '), { timeout } ); // Determine CAPTCHA type for (const selector of captchaSelectors) { if (await page.$(selector)) { return this.identifyCaptchaType(page, selector); } } } catch (error) { console.log('No CAPTCHA found within timeout'); return null; } } async identifyCaptchaType(page, selector) { if (selector.includes('g-recaptcha')) { return { type: 'recaptcha', selector }; } else if (selector.includes('h-captcha')) { return { type: 'hcaptcha', selector }; } else if (selector.includes('captcha-image')) { return { type: 'image', selector }; } return { type: 'unknown', selector }; } async handleAjaxCaptcha(page) { // Intercept AJAX requests await page.setRequestInterception(true); page.on('request', request => { console.log('Request:', request.url()); request.continue(); }); page.on('response', async response => { if (response.url().includes('captcha')) { console.log('CAPTCHA response detected'); const data = await response.json(); console.log('CAPTCHA data:', data); } }); } }

Production-Ready Implementation

const puppeteer = require('puppeteer-extra'); const StealthPlugin = require('puppeteer-extra-plugin-stealth'); const { Cluster } = require('puppeteer-cluster'); puppeteer.use(StealthPlugin()); class ProductionCaptchaAutomation { constructor(apiKey, options = {}) { this.apiKey = apiKey; this.options = { maxConcurrency: options.maxConcurrency || 5, retryLimit: options.retryLimit || 3, timeout: options.timeout || 30000, headless: options.headless !== false, ...options }; } async initialize() { // Create cluster for parallel processing this.cluster = await Cluster.launch({ concurrency: Cluster.CONCURRENCY_CONTEXT, maxConcurrency: this.options.maxConcurrency, puppeteer, puppeteerOptions: { headless: this.options.headless, args: [ '--no-sandbox', '--disable-setuid-sandbox', '--disable-dev-shm-usage', '--disable-accelerated-2d-canvas', '--no-first-run', '--no-zygote', '--single-process', '--disable-gpu' ] } }); // Define task await this.cluster.task(async ({ page, data }) => { return await this.processUrl(page, data); }); } async processUrl(page, { url, action }) { try { // Setup page await this.setupPage(page); // Navigate await page.goto(url, { waitUntil: 'networkidle2', timeout: this.options.timeout }); // Check for CAPTCHA const captcha = await this.detectCaptcha(page); if (captcha) { await this.solveCaptcha(page, captcha); } // Perform action const result = await this.performAction(page, action); return { url, success: true, data: result }; } catch (error) { console.error(`Error processing ${url}:`, error); return { url, success: false, error: error.message }; } } async processUrls(urls) { const results = []; for (const url of urls) { const result = await this.cluster.queue({ url, action: 'scrape' }); results.push(result); } await this.cluster.idle(); await this.cluster.close(); return results; } async setupPage(page) { // Set viewport await page.setViewport({ width: 1920, height: 1080, deviceScaleFactor: 1 }); // Block unnecessary resources await page.setRequestInterception(true); page.on('request', (request) => { const resourceType = request.resourceType(); if (['image', 'stylesheet', 'font'].includes(resourceType)) { request.abort(); } else { request.continue(); } }); // Add error handling page.on('error', err => { console.error('Page error:', err); }); page.on('pageerror', err => { console.error('Page error:', err); }); } async detectCaptcha(page) { const captchaTypes = [ { type: 'recaptcha', selector: '.g-recaptcha', siteKeyAttr: 'data-sitekey' }, { type: 'hcaptcha', selector: '.h-captcha', siteKeyAttr: 'data-sitekey' }, { type: 'image', selector: 'img[class*="captcha"]', inputSelector: 'input[name*="captcha"]' } ]; for (const captchaType of captchaTypes) { const element = await page.$(captchaType.selector); if (element) { return captchaType; } } return null; } async solveCaptcha(page, captchaInfo) { switch (captchaInfo.type) { case 'recaptcha': await this.solveRecaptcha(page, captchaInfo); break; case 'hcaptcha': await this.solveHcaptcha(page, captchaInfo); break; case 'image': await this.solveImageCaptcha(page, captchaInfo); break; } } } // Usage async function main() { const automation = new ProductionCaptchaAutomation('YOUR_API_KEY', { maxConcurrency: 10, headless: true }); await automation.initialize(); const urls = [ 'https://example.com/page1', 'https://example.com/page2', // ... more URLs ]; const results = await automation.processUrls(urls); console.log('Results:', results); } main().catch(console.error);

Performance Tips

OptimizationImpactImplementation
Block images/CSS50% fasterRequest interception
Use page pool3x throughputPuppeteer Cluster
Disable GPULower memoryLaunch args
Cache sessionsSkip loginCookie persistence

Start Building with Puppeteer

Create powerful browser automation that handles any CAPTCHA seamlessly with our API.