cc_local

src

  1 +.ipynb_checkpoints
  1 +name: tutorial
  2 +
  3 +conda_env: conda.yaml
  4 +
  5 +entry_points:
  6 + main:
  7 + parameters:
  8 + alpha: float
  9 + l1_ratio: {type: float, default: 0.1}
  10 + command: "python train.py {alpha} {l1_ratio}"
  1 +name: tutorial
  2 +channels:
  3 + - defaults
  4 +dependencies:
  5 + - numpy>=1.14.3
  6 + - pandas>=1.0.0
  7 + - scikit-learn=0.19.1
  8 + - pip
  9 + - pip:
  10 + - mlflow
1 -print("hello world")  
  1 +# The data set used in this example is from http://archive.ics.uci.edu/ml/datasets/Wine+Quality
  2 +# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis.
  3 +# Modeling wine preferences by data mining from physicochemical properties. In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
  4 +
  5 +import os
  6 +import warnings
  7 +import sys
  8 +
  9 +import pandas as pd
  10 +import numpy as np
  11 +from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
  12 +from sklearn.model_selection import train_test_split
  13 +from sklearn.linear_model import ElasticNet
  14 +
  15 +import mlflow
  16 +import mlflow.sklearn
  17 +
  18 +
  19 +def eval_metrics(actual, pred):
  20 + rmse = np.sqrt(mean_squared_error(actual, pred))
  21 + mae = mean_absolute_error(actual, pred)
  22 + r2 = r2_score(actual, pred)
  23 + return rmse, mae, r2
  24 +
  25 +
  26 +
  27 +if __name__ == "__main__":
  28 + warnings.filterwarnings("ignore")
  29 + np.random.seed(40)
  30 +
  31 + # Read the wine-quality csv file (make sure you're running this from the root of MLflow!)
  32 + wine_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "wine-quality.csv")
  33 + data = pd.read_csv(wine_path)
  34 +
  35 + # Split the data into training and test sets. (0.75, 0.25) split.
  36 + train, test = train_test_split(data)
  37 +
  38 + # The predicted column is "quality" which is a scalar from [3, 9]
  39 + train_x = train.drop(["quality"], axis=1)
  40 + test_x = test.drop(["quality"], axis=1)
  41 + train_y = train[["quality"]]
  42 + test_y = test[["quality"]]
  43 +
  44 + alpha = float(sys.argv[1]) if len(sys.argv) > 1 else 0.5
  45 + l1_ratio = float(sys.argv[2]) if len(sys.argv) > 2 else 0.5
  46 +
  47 + with mlflow.start_run():
  48 + lr = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
  49 + lr.fit(train_x, train_y)
  50 +
  51 + predicted_qualities = lr.predict(test_x)
  52 +
  53 + (rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
  54 +
  55 + print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
  56 + print(" RMSE: %s" % rmse)
  57 + print(" MAE: %s" % mae)
  58 + print(" R2: %s" % r2)
  59 +
  60 + mlflow.log_param("alpha", alpha)
  61 + mlflow.log_param("l1_ratio", l1_ratio)
  62 + mlflow.log_metric("rmse", rmse)
  63 + mlflow.log_metric("r2", r2)
  64 + mlflow.log_metric("mae", mae)
  65 +
  66 + mlflow.sklearn.log_model(lr, "model")
This diff could not be displayed because it is too large.