Scrape Reviews Sản Phẩm: Thu Thập Đánh Giá Khách Hàng

Trở lại Tin tức
Tin tức

Scrape Reviews Sản Phẩm: Thu Thập Đánh Giá Khách Hàng

Reviews là nguồn insight quý giá. Bài viết hướng dẫn scrape customer reviews cho market research.

Use Cases

  • Product research: Pros/cons của sản phẩm
  • Competitor analysis: Điểm yếu đối thủ
  • Sentiment analysis: Phản hồi overall
  • Feature extraction: Tính năng được mention nhiều
  • Quality monitoring: Track chất lượng theo thời gian

Nguồn Reviews

  • Amazon, Shopee, Lazada
  • Google Reviews
  • Yelp, TripAdvisor
  • App Store, Google Play
  • G2, Capterra (B2B software)

Shopee Reviews Scraper

import requests

def scrape_shopee_reviews(item_id, shop_id):
    url = f'https://shopee.vn/api/v2/item/get_ratings'
    params = {
        'itemid': item_id,
        'shopid': shop_id,
        'limit': 50,
        'offset': 0,
        'type': 0  # All ratings
    }
    
    headers = {
        'User-Agent': 'Mozilla/5.0...',
        'Referer': 'https://shopee.vn'
    }
    
    reviews = []
    while True:
        response = requests.get(url, params=params, headers=headers)
        data = response.json()
        
        for rating in data['data']['ratings']:
            reviews.append({
                'rating': rating['rating_star'],
                'comment': rating['comment'],
                'author': rating['author_username'],
                'date': rating['ctime'],
                'likes': rating['like_count']
            })
        
        if len(data['data']['ratings']) < 50:
            break
        params['offset'] += 50
    
    return reviews

Google Reviews (Maps)

from playwright.sync_api import sync_playwright

def scrape_google_reviews(place_url):
    with sync_playwright() as p:
        browser = p.chromium.launch(headless=True)
        page = browser.new_page()
        page.goto(place_url)
        
        # Click "Reviews" tab
        page.click('button[data-tab-index="1"]')
        page.wait_for_timeout(2000)
        
        # Scroll to load more reviews
        review_panel = page.query_selector('[data-review-id]').evaluate_handle(
            'el => el.parentElement.parentElement')
        
        for _ in range(10):
            review_panel.evaluate('el => el.scrollTop = el.scrollHeight')
            page.wait_for_timeout(1000)
        
        reviews = []
        for review in page.query_selector_all('[data-review-id]'):
            reviews.append({
                'author': review.query_selector('.d4r55').inner_text(),
                'rating': len(review.query_selector_all('.hCCjke.vzX5Ic')),
                'text': review.query_selector('.MyEned').inner_text() if review.query_selector('.MyEned') else '',
                'date': review.query_selector('.rsqaWe').inner_text()
            })
        
        browser.close()
        return reviews

Sentiment Analysis

from collections import Counter

positive_words = ['tốt', 'đẹp', 'nhanh', 'chất lượng', 'hài lòng', 'recommend']
negative_words = ['tệ', 'chậm', 'hỏng', 'thất vọng', 'không tốt', 'kém']

def analyze_sentiment(reviews):
    positive = 0
    negative = 0
    
    for review in reviews:
        text = review['comment'].lower()
        if any(word in text for word in positive_words):
            positive += 1
        if any(word in text for word in negative_words):
            negative += 1
    
    return {
        'positive': positive,
        'negative': negative,
        'neutral': len(reviews) - positive - negative,
        'sentiment_ratio': positive / (negative + 1)
    }

Feature Extraction

from collections import Counter
import re

def extract_features(reviews):
    features = ['pin', 'màn hình', 'camera', 'giá', 'ship', 'đóng gói']
    mentions = Counter()
    
    for review in reviews:
        text = review['comment'].lower()
        for feature in features:
            if feature in text:
                mentions[feature] += 1
    
    return mentions.most_common()

VinaProxy + Review Scraping

  • Scrape reviews không bị block
  • Collect từ nhiều platforms
  • Giá chỉ $0.5/GB

Dùng Thử Ngay →