Scrape Reviews Sản Phẩm: Thu Thập Đánh Giá Khách Hàng
Reviews là nguồn insight quý giá. Bài viết hướng dẫn scrape customer reviews cho market research.
Use Cases
- Product research: Pros/cons của sản phẩm
- Competitor analysis: Điểm yếu đối thủ
- Sentiment analysis: Phản hồi overall
- Feature extraction: Tính năng được mention nhiều
- Quality monitoring: Track chất lượng theo thời gian
Nguồn Reviews
- Amazon, Shopee, Lazada
- Google Reviews
- Yelp, TripAdvisor
- App Store, Google Play
- G2, Capterra (B2B software)
Shopee Reviews Scraper
import requests
def scrape_shopee_reviews(item_id, shop_id):
url = f'https://shopee.vn/api/v2/item/get_ratings'
params = {
'itemid': item_id,
'shopid': shop_id,
'limit': 50,
'offset': 0,
'type': 0 # All ratings
}
headers = {
'User-Agent': 'Mozilla/5.0...',
'Referer': 'https://shopee.vn'
}
reviews = []
while True:
response = requests.get(url, params=params, headers=headers)
data = response.json()
for rating in data['data']['ratings']:
reviews.append({
'rating': rating['rating_star'],
'comment': rating['comment'],
'author': rating['author_username'],
'date': rating['ctime'],
'likes': rating['like_count']
})
if len(data['data']['ratings']) < 50:
break
params['offset'] += 50
return reviews
Google Reviews (Maps)
from playwright.sync_api import sync_playwright
def scrape_google_reviews(place_url):
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto(place_url)
# Click "Reviews" tab
page.click('button[data-tab-index="1"]')
page.wait_for_timeout(2000)
# Scroll to load more reviews
review_panel = page.query_selector('[data-review-id]').evaluate_handle(
'el => el.parentElement.parentElement')
for _ in range(10):
review_panel.evaluate('el => el.scrollTop = el.scrollHeight')
page.wait_for_timeout(1000)
reviews = []
for review in page.query_selector_all('[data-review-id]'):
reviews.append({
'author': review.query_selector('.d4r55').inner_text(),
'rating': len(review.query_selector_all('.hCCjke.vzX5Ic')),
'text': review.query_selector('.MyEned').inner_text() if review.query_selector('.MyEned') else '',
'date': review.query_selector('.rsqaWe').inner_text()
})
browser.close()
return reviews
Sentiment Analysis
from collections import Counter
positive_words = ['tốt', 'đẹp', 'nhanh', 'chất lượng', 'hài lòng', 'recommend']
negative_words = ['tệ', 'chậm', 'hỏng', 'thất vọng', 'không tốt', 'kém']
def analyze_sentiment(reviews):
positive = 0
negative = 0
for review in reviews:
text = review['comment'].lower()
if any(word in text for word in positive_words):
positive += 1
if any(word in text for word in negative_words):
negative += 1
return {
'positive': positive,
'negative': negative,
'neutral': len(reviews) - positive - negative,
'sentiment_ratio': positive / (negative + 1)
}
Feature Extraction
from collections import Counter
import re
def extract_features(reviews):
features = ['pin', 'màn hình', 'camera', 'giá', 'ship', 'đóng gói']
mentions = Counter()
for review in reviews:
text = review['comment'].lower()
for feature in features:
if feature in text:
mentions[feature] += 1
return mentions.most_common()
VinaProxy + Review Scraping
- Scrape reviews không bị block
- Collect từ nhiều platforms
- Giá chỉ $0.5/GB
