Proxy Geo-Targeting: Scrape Dữ Liệu Theo Vùng Địa Lý
Websites hiển thị content khác nhau theo location. Bài viết hướng dẫn geo-targeting với proxy.
Tại Sao Cần Geo-Targeting?
- Giá khác nhau: E-commerce giá theo region
- Content khác: News localized
- Availability: Products chỉ bán ở một số nước
- Ads research: Xem ads theo market
Country-Level Targeting
# VinaProxy format
proxy_vn = 'http://user:pass_country-vn@proxy.vinaproxy.com:8080'
proxy_us = 'http://user:pass_country-us@proxy.vinaproxy.com:8080'
proxy_jp = 'http://user:pass_country-jp@proxy.vinaproxy.com:8080'
# Usage
response_vn = requests.get(url, proxies={'http': proxy_vn, 'https': proxy_vn})
response_us = requests.get(url, proxies={'http': proxy_us, 'https': proxy_us})
# Compare prices
print(f"VN price: {extract_price(response_vn)}")
print(f"US price: {extract_price(response_us)}")
City-Level Targeting
# Major cities trong Vietnam
proxy_hcm = 'http://user:pass_country-vn_city-hochiminh@proxy.vinaproxy.com:8080'
proxy_hn = 'http://user:pass_country-vn_city-hanoi@proxy.vinaproxy.com:8080'
proxy_dn = 'http://user:pass_country-vn_city-danang@proxy.vinaproxy.com:8080'
# US cities
proxy_nyc = 'http://user:pass_country-us_city-newyork@proxy.vinaproxy.com:8080'
proxy_la = 'http://user:pass_country-us_city-losangeles@proxy.vinaproxy.com:8080'
Use Case 1: E-commerce Price Monitoring
def compare_prices_by_region(product_url, regions):
"""So sánh giá sản phẩm theo vùng"""
prices = {}
for region, proxy in regions.items():
response = requests.get(
product_url,
proxies={'http': proxy, 'https': proxy}
)
price = extract_price(response.text)
prices[region] = price
print(f"{region}: {price}")
return prices
regions = {
'Vietnam': 'http://user:pass_country-vn@proxy:8080',
'Singapore': 'http://user:pass_country-sg@proxy:8080',
'USA': 'http://user:pass_country-us@proxy:8080'
}
prices = compare_prices_by_region('https://example.com/product', regions)
cheapest = min(prices.items(), key=lambda x: x[1])
Use Case 2: Localized Search Results
def google_search_by_location(query, countries):
"""Search Google từ nhiều locations"""
results = {}
for country, proxy in countries.items():
# Google domain theo country
google_url = f'https://www.google.{country}/search?q={query}'
response = requests.get(
google_url,
proxies={'http': proxy, 'https': proxy},
headers={'Accept-Language': 'en-US'}
)
results[country] = parse_serp(response.text)
return results
# So sánh rankings theo market
rankings = google_search_by_location('web scraping', {
'com': proxy_us,
'co.uk': proxy_uk,
'com.vn': proxy_vn
})
Use Case 3: Ads Intelligence
def collect_ads_by_region(url, regions):
"""Thu thập ads theo vùng"""
from playwright.sync_api import sync_playwright
ads_data = {}
with sync_playwright() as p:
for region, proxy_info in regions.items():
browser = p.chromium.launch(proxy=proxy_info)
page = browser.new_page()
page.goto(url)
# Capture ads
ads = page.query_selector_all('.ad-banner, [data-ad]')
ads_data[region] = [ad.inner_text() for ad in ads]
browser.close()
return ads_data
Use Case 4: Content Availability
def check_availability(url, countries):
"""Kiểm tra content có available không"""
availability = {}
for country, proxy in countries.items():
try:
response = requests.get(
url,
proxies={'http': proxy, 'https': proxy},
timeout=15
)
if response.status_code == 200:
availability[country] = 'Available'
elif response.status_code == 451:
availability[country] = 'Geo-blocked'
else:
availability[country] = f'Error {response.status_code}'
except:
availability[country] = 'Failed'
return availability
# Check streaming service availability
check_availability('https://streaming-service.com/movie', all_countries)
Multi-Region Parallel
import concurrent.futures
def scrape_region(region, proxy, url):
response = requests.get(url, proxies={'http': proxy, 'https': proxy})
return region, response.text
regions = {
'VN': proxy_vn,
'US': proxy_us,
'JP': proxy_jp,
'SG': proxy_sg
}
with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
futures = [
executor.submit(scrape_region, r, p, url)
for r, p in regions.items()
]
results = {}
for future in concurrent.futures.as_completed(futures):
region, content = future.result()
results[region] = content
VinaProxy Geo Coverage
- 195+ countries
- City-level targeting
- Instant switching
- Giá chỉ $0.5/GB
