Showing
6 changed files
with
87 additions
and
1 deletions
1 | +.ipynb_checkpoints |
MLproject
0 → 100644
conda.yaml
0 → 100644
test.py
deleted
100644 → 0
1 | -print("hello world") |
train.py
0 → 100644
1 | +# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality | ||
2 | +# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. | ||
3 | +# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009. | ||
4 | + | ||
5 | +import os | ||
6 | +import warnings | ||
7 | +import sys | ||
8 | + | ||
9 | +import pandas as pd | ||
10 | +import numpy as np | ||
11 | +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score | ||
12 | +from sklearn.model_selection import train_test_split | ||
13 | +from sklearn.linear_model import ElasticNet | ||
14 | + | ||
15 | +import mlflow | ||
16 | +import mlflow.sklearn | ||
17 | + | ||
18 | + | ||
19 | +def eval_metrics(actual, pred): | ||
20 | + rmse = np.sqrt(mean_squared_error(actual, pred)) | ||
21 | + mae = mean_absolute_error(actual, pred) | ||
22 | + r2 = r2_score(actual, pred) | ||
23 | + return rmse, mae, r2 | ||
24 | + | ||
25 | + | ||
26 | + | ||
27 | +if __name__ == "__main__": | ||
28 | + warnings.filterwarnings("ignore") | ||
29 | + np.random.seed(40) | ||
30 | + | ||
31 | + # Read the wine-quality csv file (make sure you're running this from the root of MLflow!) | ||
32 | + wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv") | ||
33 | + data = pd.read_csv(wine_path) | ||
34 | + | ||
35 | + # Split the data into training and test sets. (0.75, 0.25) split. | ||
36 | + train, test = train_test_split(data) | ||
37 | + | ||
38 | + # The predicted column is "quality" which is a scalar from [3, 9] | ||
39 | + train_x = train.drop(["quality"], axis=1) | ||
40 | + test_x = test.drop(["quality"], axis=1) | ||
41 | + train_y = train[["quality"]] | ||
42 | + test_y = test[["quality"]] | ||
43 | + | ||
44 | + alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5 | ||
45 | + l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5 | ||
46 | + | ||
47 | + with mlflow.start_run(): | ||
48 | + lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42) | ||
49 | + lr.fit(train_x, train_y) | ||
50 | + | ||
51 | + predicted_qualities = lr.predict(test_x) | ||
52 | + | ||
53 | + (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities) | ||
54 | + | ||
55 | + print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio)) | ||
56 | + print(" RMSE: %s" % rmse) | ||
57 | + print(" MAE: %s" % mae) | ||
58 | + print(" R2: %s" % r2) | ||
59 | + | ||
60 | + mlflow.log_param("alpha", alpha) | ||
61 | + mlflow.log_param("l1_ratio", l1_ratio) | ||
62 | + mlflow.log_metric("rmse", rmse) | ||
63 | + mlflow.log_metric("r2", r2) | ||
64 | + mlflow.log_metric("mae", mae) | ||
65 | + | ||
66 | + mlflow.sklearn.log_model(lr, "model") |
wine-quality.csv
0 → 100644
This diff could not be displayed because it is too large.
-
Please register or login to post a comment