Tích Hợp Proxy Với Scrapy, Playwright, Selenium

Trở lại Tin tức
Tin tức

Tích Hợp Proxy Với Scrapy, Playwright, Selenium

Mỗi framework cấu hình proxy khác nhau. Bài viết hướng dẫn tích hợp proxy với các framework phổ biến.

Scrapy

Middleware Đơn Giản

# middlewares.py
class ProxyMiddleware:
    def process_request(self, request, spider):
        request.meta['proxy'] = 'http://user:pass@proxy.vinaproxy.com:8080'

# settings.py
DOWNLOADER_MIDDLEWARES = {
    'myproject.middlewares.ProxyMiddleware': 350,
}

Rotating Proxy Middleware

# middlewares.py
import random

class RotatingProxyMiddleware:
    def __init__(self):
        self.proxies = [
            'http://user:pass_country-vn@proxy.vinaproxy.com:8080',
            'http://user:pass_country-us@proxy.vinaproxy.com:8080',
            'http://user:pass_country-sg@proxy.vinaproxy.com:8080',
        ]
    
    def process_request(self, request, spider):
        request.meta['proxy'] = random.choice(self.proxies)
    
    def process_exception(self, request, exception, spider):
        # Retry với proxy khác nếu fail
        return request

Scrapy với scrapy-rotating-proxies

# pip install scrapy-rotating-proxies

# settings.py
ROTATING_PROXY_LIST = [
    'http://user:pass@proxy1.vinaproxy.com:8080',
    'http://user:pass@proxy2.vinaproxy.com:8080',
]

DOWNLOADER_MIDDLEWARES = {
    'rotating_proxies.middlewares.RotatingProxyMiddleware': 610,
    'rotating_proxies.middlewares.BanDetectionMiddleware': 620,
}

Playwright

Basic Setup

from playwright.sync_api import sync_playwright

with sync_playwright() as p:
    browser = p.chromium.launch(
        proxy={
            'server': 'http://proxy.vinaproxy.com:8080',
            'username': 'user123',
            'password': 'pass456'
        }
    )
    
    page = browser.new_page()
    page.goto('https://example.com')
    print(page.content())
    browser.close()

Async Playwright

import asyncio
from playwright.async_api import async_playwright

async def scrape_with_proxy():
    async with async_playwright() as p:
        browser = await p.chromium.launch(
            proxy={
                'server': 'http://proxy.vinaproxy.com:8080',
                'username': 'user',
                'password': 'pass'
            }
        )
        
        page = await browser.new_page()
        await page.goto('https://example.com')
        content = await page.content()
        await browser.close()
        return content

asyncio.run(scrape_with_proxy())

Context-Level Proxy

# Mỗi context có proxy riêng
browser = p.chromium.launch()

context_vn = browser.new_context(
    proxy={'server': 'http://user:pass_country-vn@proxy:8080'}
)
context_us = browser.new_context(
    proxy={'server': 'http://user:pass_country-us@proxy:8080'}
)

page_vn = context_vn.new_page()
page_us = context_us.new_page()

Selenium

Chrome với Proxy

from selenium import webdriver
from selenium.webdriver.chrome.options import Options

options = Options()
options.add_argument('--proxy-server=http://proxy.vinaproxy.com:8080')

driver = webdriver.Chrome(options=options)
driver.get('https://example.com')

Selenium với Authentication

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from seleniumwire import webdriver as wire_webdriver

# Dùng selenium-wire cho authenticated proxy
# pip install selenium-wire

options = {
    'proxy': {
        'http': 'http://user:pass@proxy.vinaproxy.com:8080',
        'https': 'http://user:pass@proxy.vinaproxy.com:8080',
    }
}

driver = wire_webdriver.Chrome(seleniumwire_options=options)
driver.get('https://httpbin.org/ip')

Firefox với Proxy

from selenium import webdriver
from selenium.webdriver.firefox.options import Options

profile = webdriver.FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', 'proxy.vinaproxy.com')
profile.set_preference('network.proxy.http_port', 8080)

driver = webdriver.Firefox(firefox_profile=profile)

httpx (Async HTTP)

import httpx

proxy = 'http://user:pass@proxy.vinaproxy.com:8080'

# Sync
with httpx.Client(proxy=proxy) as client:
    response = client.get('https://example.com')

# Async
async with httpx.AsyncClient(proxy=proxy) as client:
    response = await client.get('https://example.com')

aiohttp

import aiohttp
import asyncio

async def fetch():
    async with aiohttp.ClientSession() as session:
        async with session.get(
            'https://httpbin.org/ip',
            proxy='http://user:pass@proxy.vinaproxy.com:8080'
        ) as response:
            return await response.json()

result = asyncio.run(fetch())

So Sánh Frameworks

Framework JS Render Async Proxy Setup
Scrapy Twisted Middleware
Playwright Launch option
Selenium Options/Wire
httpx Client param

VinaProxy + Any Framework

  • Works với tất cả frameworks
  • HTTP và SOCKS5 support
  • Giá chỉ $0.5/GB

Tích Hợp Ngay →