Tích Hợp Proxy Với Scrapy, Playwright, Selenium
Mỗi framework cấu hình proxy khác nhau. Bài viết hướng dẫn tích hợp proxy với các framework phổ biến.
Scrapy
Middleware Đơn Giản
# middlewares.py
class ProxyMiddleware:
def process_request(self, request, spider):
request.meta['proxy'] = 'http://user:pass@proxy.vinaproxy.com:8080'
# settings.py
DOWNLOADER_MIDDLEWARES = {
'myproject.middlewares.ProxyMiddleware': 350,
}
Rotating Proxy Middleware
# middlewares.py
import random
class RotatingProxyMiddleware:
def __init__(self):
self.proxies = [
'http://user:pass_country-vn@proxy.vinaproxy.com:8080',
'http://user:pass_country-us@proxy.vinaproxy.com:8080',
'http://user:pass_country-sg@proxy.vinaproxy.com:8080',
]
def process_request(self, request, spider):
request.meta['proxy'] = random.choice(self.proxies)
def process_exception(self, request, exception, spider):
# Retry với proxy khác nếu fail
return request
Scrapy với scrapy-rotating-proxies
# pip install scrapy-rotating-proxies
# settings.py
ROTATING_PROXY_LIST = [
'http://user:pass@proxy1.vinaproxy.com:8080',
'http://user:pass@proxy2.vinaproxy.com:8080',
]
DOWNLOADER_MIDDLEWARES = {
'rotating_proxies.middlewares.RotatingProxyMiddleware': 610,
'rotating_proxies.middlewares.BanDetectionMiddleware': 620,
}
Playwright
Basic Setup
from playwright.sync_api import sync_playwright
with sync_playwright() as p:
browser = p.chromium.launch(
proxy={
'server': 'http://proxy.vinaproxy.com:8080',
'username': 'user123',
'password': 'pass456'
}
)
page = browser.new_page()
page.goto('https://example.com')
print(page.content())
browser.close()
Async Playwright
import asyncio
from playwright.async_api import async_playwright
async def scrape_with_proxy():
async with async_playwright() as p:
browser = await p.chromium.launch(
proxy={
'server': 'http://proxy.vinaproxy.com:8080',
'username': 'user',
'password': 'pass'
}
)
page = await browser.new_page()
await page.goto('https://example.com')
content = await page.content()
await browser.close()
return content
asyncio.run(scrape_with_proxy())
Context-Level Proxy
# Mỗi context có proxy riêng
browser = p.chromium.launch()
context_vn = browser.new_context(
proxy={'server': 'http://user:pass_country-vn@proxy:8080'}
)
context_us = browser.new_context(
proxy={'server': 'http://user:pass_country-us@proxy:8080'}
)
page_vn = context_vn.new_page()
page_us = context_us.new_page()
Selenium
Chrome với Proxy
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument('--proxy-server=http://proxy.vinaproxy.com:8080')
driver = webdriver.Chrome(options=options)
driver.get('https://example.com')
Selenium với Authentication
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from seleniumwire import webdriver as wire_webdriver
# Dùng selenium-wire cho authenticated proxy
# pip install selenium-wire
options = {
'proxy': {
'http': 'http://user:pass@proxy.vinaproxy.com:8080',
'https': 'http://user:pass@proxy.vinaproxy.com:8080',
}
}
driver = wire_webdriver.Chrome(seleniumwire_options=options)
driver.get('https://httpbin.org/ip')
Firefox với Proxy
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
profile = webdriver.FirefoxProfile()
profile.set_preference('network.proxy.type', 1)
profile.set_preference('network.proxy.http', 'proxy.vinaproxy.com')
profile.set_preference('network.proxy.http_port', 8080)
driver = webdriver.Firefox(firefox_profile=profile)
httpx (Async HTTP)
import httpx
proxy = 'http://user:pass@proxy.vinaproxy.com:8080'
# Sync
with httpx.Client(proxy=proxy) as client:
response = client.get('https://example.com')
# Async
async with httpx.AsyncClient(proxy=proxy) as client:
response = await client.get('https://example.com')
aiohttp
import aiohttp
import asyncio
async def fetch():
async with aiohttp.ClientSession() as session:
async with session.get(
'https://httpbin.org/ip',
proxy='http://user:pass@proxy.vinaproxy.com:8080'
) as response:
return await response.json()
result = asyncio.run(fetch())
So Sánh Frameworks
| Framework | JS Render | Async | Proxy Setup |
|---|---|---|---|
| Scrapy | ❌ | Twisted | Middleware |
| Playwright | ✅ | ✅ | Launch option |
| Selenium | ✅ | ❌ | Options/Wire |
| httpx | ❌ | ✅ | Client param |
VinaProxy + Any Framework
- Works với tất cả frameworks
- HTTP và SOCKS5 support
- Giá chỉ $0.5/GB
