Scrape Dữ Liệu Chứng Khoán Với Python: Stock Data Collection

Dữ liệu tài chính real-time rất giá trị. Bài viết hướng dẫn scrape stock data từ các nguồn Việt Nam.

Use Cases

Trading signals: Phân tích kỹ thuật
Portfolio tracking: Theo dõi danh mục
Backtesting: Test strategies
Market analysis: Xu hướng thị trường

Nguồn Data Chứng Khoán VN

vndirect.com.vn
ssi.com.vn
cafef.vn
vietstock.vn
cophieu68.vn

Data Points

Mã CK, giá hiện tại
Giá mở/cao/thấp/đóng
Khối lượng giao dịch
% thay đổi
Vốn hóa thị trường

Cafef Stock Scraper

import requests
from bs4 import BeautifulSoup

def scrape_stock(symbol):
    url = f'https://cafef.vn/thi-truong-chung-khoan/{symbol}.chn'
    response = requests.get(url, headers={'User-Agent': '...'})
    soup = BeautifulSoup(response.text, 'lxml')
    
    price_box = soup.select_one('.dltd')
    
    return {
        'symbol': symbol,
        'price': soup.select_one('.price').text.strip(),
        'change': soup.select_one('.change').text.strip(),
        'change_pct': soup.select_one('.change-pct').text.strip(),
        'volume': soup.select_one('.volume').text.strip(),
        'high': soup.select_one('.high').text.strip(),
        'low': soup.select_one('.low').text.strip()
    }

# Scrape multiple stocks
symbols = ['VNM', 'VIC', 'VHM', 'HPG', 'TCB']
stocks = [scrape_stock(s) for s in symbols]

Historical Data

def scrape_history(symbol, days=30):
    url = f'https://cafef.vn/du-lieu/{symbol}.chn'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    
    history = []
    for row in soup.select('table.data-table tr')[1:days+1]:
        cells = row.select('td')
        history.append({
            'date': cells[0].text.strip(),
            'close': float(cells[1].text.replace(',', '')),
            'change': cells[2].text.strip(),
            'volume': int(cells[3].text.replace(',', ''))
        })
    
    return history

Real-time với VNDirect API

import requests

def get_realtime_price(symbol):
    # VNDirect có API endpoint
    url = 'https://price-s3.vndirect.com.vn/priceservice/marketdata'
    params = {
        'q': f'code:{symbol}'
    }
    
    response = requests.get(url, params=params)
    data = response.json()
    
    if data['data']:
        stock = data['data'][0]
        return {
            'symbol': stock['code'],
            'price': stock['lastPrice'],
            'change': stock['change'],
            'volume': stock['totalQtty']
        }
    return None

Market Overview

def scrape_vnindex():
    url = 'https://cafef.vn/thi-truong-chung-khoan.chn'
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'lxml')
    
    return {
        'vnindex': soup.select_one('#vnindex .price').text,
        'hnxindex': soup.select_one('#hnxindex .price').text,
        'upcom': soup.select_one('#upcomindex .price').text
    }

Store & Analyze

import pandas as pd

# Convert to DataFrame
df = pd.DataFrame(history)
df['date'] = pd.to_datetime(df['date'])
df.set_index('date', inplace=True)

# Calculate moving averages
df['MA20'] = df['close'].rolling(20).mean()
df['MA50'] = df['close'].rolling(50).mean()

# Simple signal
df['signal'] = (df['MA20'] > df['MA50']).astype(int)

VinaProxy + Financial Scraping

Scrape real-time data liên tục
Bypass rate limits
Giá chỉ $0.5/GB

Dùng Thử Ngay →

Scrape Dữ Liệu Chứng Khoán Với Python: Stock Data Collection

Use Cases

Nguồn Data Chứng Khoán VN

Data Points

Cafef Stock Scraper

Historical Data

Real-time với VNDirect API

Market Overview

Store & Analyze

VinaProxy + Financial Scraping

admin