Skip to content
Snippets Groups Projects
Commit a9a03646 authored by d-gol's avatar d-gol
Browse files

Small fixes model storage example

parent 223e6b60
Branches
Tags
No related merge requests found
%% Cell type:markdown id: tags:
### Train a Model
%% Cell type:code id: tags:
``` python
import tensorflow as tf
import numpy as np
import os
from kubeflow.metadata import metadata
from uuid import uuid4
from datetime import datetime
```
%% Cell type:code id: tags:
``` python
nodes_number = 32
learning_rate = 0.0001
```
%% Cell type:code id: tags:
``` python
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0
```
%% Cell type:code id: tags:
``` python
def filter_36(x, y):
keep = (y == 3) | (y == 6)
x, y = x[keep], y[keep]
y = y == 3
return x,y
x_train, y_train = filter_36(x_train, y_train)
x_test, y_test = filter_36(x_test, y_test)
```
%% Cell type:code id: tags:
``` python
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(32, [3, 3], activation='relu', input_shape=(28,28,1)))
model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.Dropout(0.25))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(nodes_number, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(1))
model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate), metrics=['accuracy'])
model.fit(x_train, y_train, batch_size=256, epochs=1, verbose=1, validation_data=(x_test, y_test))
model_test_acc = model.evaluate(x_test, y_test)[1]
```
%% Cell type:markdown id: tags:
### Store model on EOS
#### Edit code to add personal path
%% Cell type:code id: tags:
``` python
# Edit code to add personal path
user_path = 'eos/user/j/jfrancis/test_tf_model/model'
model.save(user_path)
```
%% Cell type:markdown id: tags:
#### Initialize Metadata component
%% Cell type:code id: tags:
``` python
METADATA_STORE_HOST = "metadata-grpc-service.kubeflow" # default DNS of Kubeflow Metadata gRPC serivce.
METADATA_STORE_PORT = 8080
ws1 = metadata.Workspace(
# Connect to metadata service in namespace kubeflow in k8s cluster.
store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),
name="workspace_1",
name="workspace_test",
description="a workspace for testing",
labels={"isTest": "yes"})
r = metadata.Run(
workspace=ws1,
name="run-" + datetime.utcnow().isoformat("T") ,
description="Test custom run",
)
exec = metadata.Execution(
name = "execution" + datetime.utcnow().isoformat("T") ,
workspace=ws1,
run=r,
description="execution example",
)
print("An execution was created with id %s" % exec.id)
```
%% Cell type:markdown id: tags:
#### Log Metadata info about the model stored on EOS
%% Cell type:code id: tags:
``` python
# Replace uri with eos
model_version = "model_version_" + str(uuid4())
model_md = exec.log_output(
metadata.Model(
name="Custom mnist",
description="model to differentiate 3 from 6, handwritten digits",
owner="someone@kubeflow.org",
uri=user_path,
model_type="neural network",
training_framework={
"name": "tensorflow",
"version": "v2.1"
},
hyperparameters={
"learning_rate": 0.001,
"nodes_number": [32]
},
version=model_version,
labels={"isTest": "yes"}))
print(model_md)
```
%% Cell type:markdown id: tags:
### Store model using in-cluster Minio object storage
- More info on minio - https://docs.min.io/docs/python-client-quickstart-guide.html
%% Cell type:code id: tags:
``` python
# Store to a buckets
from minio import Minio
from minio.error import S3Error
client = Minio('minio-service.kubeflow:9000',
access_key='minio',
secret_key='minio123',
secure=False)
bucket_name = 'modelbucket'
object_name = 'saved_model.pb'
found_bucket = client.bucket_exists(bucket_name)
if not found_bucket:
client.make_bucket(bucket_name)
client.fput_object(bucket_name, object_name, user_path + '/saved_model.pb')
# Download model from the bucket
download_filepath = './downloaded.pb'
client.fget_object(bucket_name, object_name, download_filepath)
# List buckets
buckets = client.list_buckets()
for bucket in buckets:
print(bucket)
print(bucket.name, bucket.creation_date)
```
%% Cell type:markdown id: tags:
#### Log Metadata info about the model stored on minio bucket
%% Cell type:code id: tags:
``` python
#model_version = "model_version_" + str(uuid4())
model_md = exec.log_output(
metadata.Model(
name="Custom mnist",
description="model to differentiate 3 from 6, handwritten digits",
owner="someone@kubeflow.org",
uri="minio-service.kubeflow:9000/" + bucket_name + "/saved_model.pb",
model_type="neural network",
training_framework={
"name": "tensorflow",
"version": "v2.1"
},
hyperparameters={
"learning_rate": 0.001,
"nodes_number": [32]
},
version=model_version,
labels={"isTest": "yes"}))
print(model_md)
```
%% Cell type:markdown id: tags:
### Model Basic Lineage Tracking
To see the full list of artifacts, navigate to:
https://ml.cern.ch/_/metadata/#/artifacts
%% Cell type:code id: tags:
``` python
print("Model id is %s\n" % model_md.id)
model_events = ws1.store.get_events_by_artifact_ids([model_md.id])
execution_ids = set(e.execution_id for e in model_events)
print("All executions related to the model are {}".format(execution_ids))
# assert execution_ids == set([serving_application.id, exec.id])
trainer_events = ws1.store.get_events_by_execution_ids([exec.id])
artifact_ids = set(e.artifact_id for e in trainer_events)
print("All artifacts related to the training event are {}".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])
```
%% Cell type:markdown id: tags:
## Advanced Options Below
- Require access to a remote S3 bucket storage (CERN, AWS, GCP, etc)
- Require access to registry.cern.ch
%% Cell type:markdown id: tags:
### Store model on s3.cern.ch centralized object storage
- To use this, please make sure you have access to a bucket on s3.cern.ch
- Credentials should be stored in ~/.aws/credentials
- Bucket needs to have writing permissions
- More info: https://clouddocs.web.cern.ch/object_store/README.html
- If not, it is likely to get error: Unable to locate credentials
%% Cell type:code id: tags:
``` python
import boto3
bucket_name = 'test_bucket'
client = boto3.client('s3', endpoint_url='https://s3.cern.ch')
client.upload_file(user_path + '/saved_model.pb', bucket_name, 'saved_model.pb')
```
%% Cell type:markdown id: tags:
#### Log Metadata info about the model stored on s3.cern.ch bucket
%% Cell type:code id: tags:
``` python
#model_version = "model_version_" + str(uuid4())
model_md = exec.log_output(
metadata.Model(
name="Custom mnist",
description="model to differentiate 3 from 6, handwritten digits",
owner="someone@kubeflow.org",
uri="s3.cern.ch/" + bucket_name + "/saved_model.pb",
model_type="neural network",
training_framework={
"name": "tensorflow",
"version": "v2.1"
},
hyperparameters={
"learning_rate": 0.001,
"nodes_number": [32]
},
version=model_version,
labels={"isTest": "yes"}))
print(model_md)
```
%% Cell type:markdown id: tags:
### Store model on registry.cern.ch, CERN OCI registry
- Login at registry.cern.ch
- Create a project with a custom PROJECT_NAME
- At top right corner, click on username
- Select User Profile
- Copy CLI secret
- In this Notebook server, open new Terminal window
- cd /ormb
- ./bin/ormb login registry.cern.ch -u USERNAME
- Paste copied CLI secret
- Make sure model directory structure corresponds to requirements
- https://github.com/kleveross/ormb/blob/master/docs/tutorial.md
- In the folder where the saved model is located, create ormbfile.yaml
- https://github.com/kleveross/ormb/blob/master/examples/SavedModel-fashion/ormbfile.yaml
- ./bin/ormb save /eos/user/LETTER/USERNAME/test_tf_model registry.cern.ch/PROJECT_NAME/test_tf_model:v1
- ./bin/ormb push registry.cern.ch/PROJECT_NAME/test_tf_model:v1
- At registry.cern.ch check the status of the uploaded model
%% Cell type:markdown id: tags:
### Model Basic Lineage Tracking
To see the full list of artifacts, navigate to:
https://ml.cern.ch/_/metadata/#/artifacts
%% Cell type:code id: tags:
``` python
print("Model id is %s\n" % model_md.id)
model_events = ws1.store.get_events_by_artifact_ids([model_md.id])
execution_ids = set(e.execution_id for e in model_events)
print("All executions related to the model are {}".format(execution_ids))
# assert execution_ids == set([serving_application.id, exec.id])
trainer_events = ws1.store.get_events_by_execution_ids([exec.id])
artifact_ids = set(e.artifact_id for e in trainer_events)
print("All artifacts related to the training event are {}".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment