This repository has been archived on 2025-06-07. You can view files and clone it, but cannot push or open issues or pull requests.
2025-05-06 21:23:04 +09:00

67 lines
1.8 KiB
Python

'''
📁 data_analysis_engine/dataset_builder.py
Complete Data Set(CDS)를 학습용 피처(X)와 타깍(y)으로 변환
기술 지표 포함
'''
import pandas as pd
import numpy as np
# 기술 지표 계산 함수
def add_technical_indicators(df: pd.DataFrame) -> pd.DataFrame:
df = df.copy()
# 이동 평균선
df['sma_5'] = df['close'].rolling(window=5).mean()
df['sma_10'] = df['close'].rolling(window=10).mean()
# RSI (상대 강도 지수)
delta = df['close'].diff()
gain = delta.where(delta > 0, 0)
loss = -delta.where(delta < 0, 0)
avg_gain = gain.rolling(window=14).mean()
avg_loss = loss.rolling(window=14).mean()
rs = avg_gain / (avg_loss + 1e-6)
df['rsi_14'] = 100 - (100 / (1 + rs))
# MACD
ema12 = df['close'].ewm(span=12, adjust=False).mean()
ema26 = df['close'].ewm(span=26, adjust=False).mean()
df['macd'] = ema12 - ema26
# Bollinger Bands
ma20 = df['close'].rolling(window=20).mean()
std20 = df['close'].rolling(window=20).std()
df['boll_upper'] = ma20 + (std20 * 2)
df['boll_lower'] = ma20 - (std20 * 2)
return df
def build_dataset(df: pd.DataFrame):
"""
CDS를 기반으로 피처(X)와 타깍(y)를 생성
기술 지표 포함
"""
df = df.copy()
df = add_technical_indicators(df)
df['target'] = (df['close'].shift(-1) > df['close']).astype(int)
df.dropna(inplace=True)
feature_cols = [
'open', 'high', 'low', 'close', 'volume',
'sma_5', 'sma_10', 'rsi_14', 'macd',
'boll_upper', 'boll_lower'
]
X = df[feature_cols]
y = df['target']
return X, y
def build_dataset_with_indicators(df: pd.DataFrame):
"""
build_dataset() 같은 기능을 행하지만, 필요없을 경우를 위해 또 다른 이름으로 제공
"""
return build_dataset(df)