-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathML_Test.py
54 lines (39 loc) · 1.83 KB
/
ML_Test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import yfinance as yf
from sklearn.ensemble import VotingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
# List of stock symbols
symbols = ['AAPL', 'MSFT', 'GOOG', 'AMZN', 'META', 'TSLA', 'NVDA', 'NFLX', 'ADBE', 'INTC']
for symbol in symbols:
print(f"Processing {symbol}...")
# Download historical data for desired ticker symbol
data = yf.download(symbol,'2000-01-01','2023-06-16')
# Use only Close price for prediction
data = data[['Close']]
# Predict for the next 'n' days
n = 1
# Create a new column (target) shifted 'n' units up
data['Predicted'] = data[['Close']].shift(-n)
# Create a binary target variable
data['Target'] = (data['Predicted'].shift(-1) > data['Predicted']).astype(int)
# Create the independent data set (X)
X = data.drop(['Predicted', 'Target'], axis=1)[:-n]
# Create the dependent data set (y)
y = data['Target'][:-n]
# Split the data into 80% training and 20% testing
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
# Define the individual classifiers
clf1 = GaussianNB()
clf2 = LogisticRegression(penalty='l1', solver='liblinear') # L1 regularization
clf3 = DecisionTreeClassifier()
clf4 = SGDClassifier()
# Define the ensemble classifier
eclf = VotingClassifier(estimators=[('gnb', clf1), ('lr', clf2), ('dt', clf3), ('sgd', clf4)], voting='hard')
# Train the ensemble classifier on the training data
eclf.fit(x_train, y_train)
# Evaluate the classifier on the test data
print(f"{symbol} Model Accuracy: ", eclf.score(x_test, y_test))