Proxy Geo-Targeting: Scrape Dữ Liệu Theo Vùng Địa Lý

Trở lại Tin tức
Tin tức

Proxy Geo-Targeting: Scrape Dữ Liệu Theo Vùng Địa Lý

Websites hiển thị content khác nhau theo location. Bài viết hướng dẫn geo-targeting với proxy.

Tại Sao Cần Geo-Targeting?

  • Giá khác nhau: E-commerce giá theo region
  • Content khác: News localized
  • Availability: Products chỉ bán ở một số nước
  • Ads research: Xem ads theo market

Country-Level Targeting

# VinaProxy format
proxy_vn = 'http://user:pass_country-vn@proxy.vinaproxy.com:8080'
proxy_us = 'http://user:pass_country-us@proxy.vinaproxy.com:8080'
proxy_jp = 'http://user:pass_country-jp@proxy.vinaproxy.com:8080'

# Usage
response_vn = requests.get(url, proxies={'http': proxy_vn, 'https': proxy_vn})
response_us = requests.get(url, proxies={'http': proxy_us, 'https': proxy_us})

# Compare prices
print(f"VN price: {extract_price(response_vn)}")
print(f"US price: {extract_price(response_us)}")

City-Level Targeting

# Major cities trong Vietnam
proxy_hcm = 'http://user:pass_country-vn_city-hochiminh@proxy.vinaproxy.com:8080'
proxy_hn = 'http://user:pass_country-vn_city-hanoi@proxy.vinaproxy.com:8080'
proxy_dn = 'http://user:pass_country-vn_city-danang@proxy.vinaproxy.com:8080'

# US cities
proxy_nyc = 'http://user:pass_country-us_city-newyork@proxy.vinaproxy.com:8080'
proxy_la = 'http://user:pass_country-us_city-losangeles@proxy.vinaproxy.com:8080'

Use Case 1: E-commerce Price Monitoring

def compare_prices_by_region(product_url, regions):
    """So sánh giá sản phẩm theo vùng"""
    prices = {}
    
    for region, proxy in regions.items():
        response = requests.get(
            product_url,
            proxies={'http': proxy, 'https': proxy}
        )
        price = extract_price(response.text)
        prices[region] = price
        print(f"{region}: {price}")
    
    return prices

regions = {
    'Vietnam': 'http://user:pass_country-vn@proxy:8080',
    'Singapore': 'http://user:pass_country-sg@proxy:8080',
    'USA': 'http://user:pass_country-us@proxy:8080'
}

prices = compare_prices_by_region('https://example.com/product', regions)
cheapest = min(prices.items(), key=lambda x: x[1])

Use Case 2: Localized Search Results

def google_search_by_location(query, countries):
    """Search Google từ nhiều locations"""
    results = {}
    
    for country, proxy in countries.items():
        # Google domain theo country
        google_url = f'https://www.google.{country}/search?q={query}'
        
        response = requests.get(
            google_url,
            proxies={'http': proxy, 'https': proxy},
            headers={'Accept-Language': 'en-US'}
        )
        
        results[country] = parse_serp(response.text)
    
    return results

# So sánh rankings theo market
rankings = google_search_by_location('web scraping', {
    'com': proxy_us,
    'co.uk': proxy_uk,
    'com.vn': proxy_vn
})

Use Case 3: Ads Intelligence

def collect_ads_by_region(url, regions):
    """Thu thập ads theo vùng"""
    from playwright.sync_api import sync_playwright
    
    ads_data = {}
    
    with sync_playwright() as p:
        for region, proxy_info in regions.items():
            browser = p.chromium.launch(proxy=proxy_info)
            page = browser.new_page()
            page.goto(url)
            
            # Capture ads
            ads = page.query_selector_all('.ad-banner, [data-ad]')
            ads_data[region] = [ad.inner_text() for ad in ads]
            
            browser.close()
    
    return ads_data

Use Case 4: Content Availability

def check_availability(url, countries):
    """Kiểm tra content có available không"""
    availability = {}
    
    for country, proxy in countries.items():
        try:
            response = requests.get(
                url,
                proxies={'http': proxy, 'https': proxy},
                timeout=15
            )
            
            if response.status_code == 200:
                availability[country] = 'Available'
            elif response.status_code == 451:
                availability[country] = 'Geo-blocked'
            else:
                availability[country] = f'Error {response.status_code}'
        except:
            availability[country] = 'Failed'
    
    return availability

# Check streaming service availability
check_availability('https://streaming-service.com/movie', all_countries)

Multi-Region Parallel

import concurrent.futures

def scrape_region(region, proxy, url):
    response = requests.get(url, proxies={'http': proxy, 'https': proxy})
    return region, response.text

regions = {
    'VN': proxy_vn,
    'US': proxy_us,
    'JP': proxy_jp,
    'SG': proxy_sg
}

with concurrent.futures.ThreadPoolExecutor(max_workers=4) as executor:
    futures = [
        executor.submit(scrape_region, r, p, url) 
        for r, p in regions.items()
    ]
    
    results = {}
    for future in concurrent.futures.as_completed(futures):
        region, content = future.result()
        results[region] = content

VinaProxy Geo Coverage

  • 195+ countries
  • City-level targeting
  • Instant switching
  • Giá chỉ $0.5/GB

Geo-Target Ngay →