Small fixes model storage example

a9a03646 · d-gol · 223e6b60 · a9a03646
Commit a9a03646 authored 3 years ago by d-gol
--- a/model-storage/model_storage.ipynb
+++ b/model-storage/model_storage.ipynb
@@ -20,7 +20,7 @@
    "import os\n",
    "from kubeflow.metadata import metadata\n",
    "from uuid import uuid4\n",
-    "from datetime import datetime\n"
+    "from datetime import datetime"
   ]
  },
  {
@@ -87,7 +87,8 @@
    "tags": []
   },
   "source": [
-    "### Store model on EOS"
+    "### Store model on EOS\n",
+    "#### Edit code to add personal path"
   ]
  },
  {
@@ -96,7 +97,6 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# Edit code to add personal path\n",
    "user_path = 'eos/user/j/jfrancis/test_tf_model/model'\n",
    "model.save(user_path)"
   ]
@@ -124,7 +124,7 @@
    "ws1 = metadata.Workspace(\n",
    "    # Connect to metadata service in namespace kubeflow in k8s cluster.\n",
    "    store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),\n",
-    "    name=\"workspace_1\",\n",
+    "    name=\"workspace_test\",\n",
    "    description=\"a workspace for testing\",\n",
    "    labels={\"isTest\": \"yes\"})\n",
    "\n",
@@ -261,6 +261,48 @@
    "print(model_md)"
   ]
  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Model Basic Lineage Tracking\n",
+    "\n",
+    "To see the full list of artifacts, navigate to:\n",
+    "https://ml.cern.ch/_/metadata/#/artifacts "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Model id is %s\\n\" % model_md.id)\n",
+    "    \n",
+    "model_events = ws1.store.get_events_by_artifact_ids([model_md.id])\n",
+    "\n",
+    "execution_ids = set(e.execution_id for e in model_events)\n",
+    "print(\"All executions related to the model are {}\".format(execution_ids))\n",
+    "# assert execution_ids == set([serving_application.id, exec.id])\n",
+    "\n",
+    "trainer_events = ws1.store.get_events_by_execution_ids([exec.id])\n",
+    "artifact_ids = set(e.artifact_id for e in trainer_events)\n",
+    "print(\"All artifacts related to the training event are {}\".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Advanced Options Below\n",
+    "- Require access to a remote S3 bucket storage (CERN, AWS, GCP, etc)\n",
+    "- Require access to registry.cern.ch"
+   ]
+  },
  {
   "cell_type": "markdown",
   "metadata": {
@@ -349,42 +391,11 @@
    "- ./bin/ormb push registry.cern.ch/PROJECT_NAME/test_tf_model:v1\n",
    "- At registry.cern.ch check the status of the uploaded model"
   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {
-    "tags": []
-   },
-   "source": [
-    "### Model Basic Lineage Tracking\n",
-    "\n",
-    "To see the full list of artifacts, navigate to:\n",
-    "https://ml.cern.ch/_/metadata/#/artifacts "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "print(\"Model id is %s\\n\" % model_md.id)\n",
-    "    \n",
-    "model_events = ws1.store.get_events_by_artifact_ids([model_md.id])\n",
-    "\n",
-    "execution_ids = set(e.execution_id for e in model_events)\n",
-    "print(\"All executions related to the model are {}\".format(execution_ids))\n",
-    "# assert execution_ids == set([serving_application.id, exec.id])\n",
-    "\n",
-    "trainer_events = ws1.store.get_events_by_execution_ids([exec.id])\n",
-    "artifact_ids = set(e.artifact_id for e in trainer_events)\n",
-    "print(\"All artifacts related to the training event are {}\".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])"
-   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3.8",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },

 %% Cell type:markdown id: tags:

 ### Train a Model

 %% Cell type:code id: tags:

 ``` python
 import tensorflow as tf
 import numpy as np
 import os
 from kubeflow.metadata import metadata
 from uuid import uuid4
 from datetime import datetime
 ```

 %% Cell type:code id: tags:

 ``` python
 nodes_number = 32
 learning_rate = 0.0001
 ```

 %% Cell type:code id: tags:

 ``` python
 (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0
 ```

 %% Cell type:code id: tags:

 ``` python
 def filter_36(x, y):
    keep = (y == 3) | (y == 6)
    x, y = x[keep], y[keep]
    y = y == 3
    return x,y

 x_train, y_train = filter_36(x_train, y_train)
 x_test, y_test = filter_36(x_test, y_test)
 ```

 %% Cell type:code id: tags:

 ``` python
 model = tf.keras.models.Sequential()
 model.add(tf.keras.layers.Conv2D(32, [3, 3], activation='relu', input_shape=(28,28,1)))
 model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu'))
 model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
 model.add(tf.keras.layers.Dropout(0.25))
 model.add(tf.keras.layers.Flatten())
 model.add(tf.keras.layers.Dense(nodes_number, activation='relu'))
 model.add(tf.keras.layers.Dropout(0.5))
 model.add(tf.keras.layers.Dense(1))

 model.compile(loss='binary_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate), metrics=['accuracy'])

 model.fit(x_train, y_train, batch_size=256, epochs=1, verbose=1, validation_data=(x_test, y_test))
 model_test_acc = model.evaluate(x_test, y_test)[1]
 ```

 %% Cell type:markdown id: tags:

 ### Store model on EOS
+#### Edit code to add personal path

 %% Cell type:code id: tags:

 ``` python
-# Edit code to add personal path
 user_path = 'eos/user/j/jfrancis/test_tf_model/model'
 model.save(user_path)
 ```

 %% Cell type:markdown id: tags:

 #### Initialize Metadata component

 %% Cell type:code id: tags:

 ``` python
 METADATA_STORE_HOST = "metadata-grpc-service.kubeflow" # default DNS of Kubeflow Metadata gRPC serivce.
 METADATA_STORE_PORT = 8080

 ws1 = metadata.Workspace(
    # Connect to metadata service in namespace kubeflow in k8s cluster.
    store=metadata.Store(grpc_host=METADATA_STORE_HOST, grpc_port=METADATA_STORE_PORT),
-    name="workspace_1",
+    name="workspace_test",
    description="a workspace for testing",
    labels={"isTest": "yes"})

 r = metadata.Run(
    workspace=ws1,
    name="run-" + datetime.utcnow().isoformat("T") ,
    description="Test custom run",
 )

 exec = metadata.Execution(
    name = "execution" + datetime.utcnow().isoformat("T") ,
    workspace=ws1,
    run=r,
    description="execution example",
 )
 print("An execution was created with id %s" % exec.id)
 ```

 %% Cell type:markdown id: tags:

 #### Log Metadata info about the model stored on EOS

 %% Cell type:code id: tags:

 ``` python
 # Replace uri with eos
 model_version = "model_version_" + str(uuid4())
 model_md = exec.log_output(
    metadata.Model(
            name="Custom mnist",
            description="model to differentiate 3 from 6, handwritten digits",
            owner="someone@kubeflow.org",
            uri=user_path,
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v2.1"
            },
            hyperparameters={
                "learning_rate": 0.001,
                "nodes_number": [32]
            },
            version=model_version,
            labels={"isTest": "yes"}))
 print(model_md)
 ```

 %% Cell type:markdown id: tags:

 ### Store model using in-cluster Minio object storage
 - More info on minio - https://docs.min.io/docs/python-client-quickstart-guide.html

 %% Cell type:code id: tags:

 ``` python
 # Store to a buckets
 from minio import Minio
 from minio.error import S3Error

 client = Minio('minio-service.kubeflow:9000',
               access_key='minio',
               secret_key='minio123',
               secure=False)

 bucket_name = 'modelbucket'
 object_name = 'saved_model.pb'

 found_bucket = client.bucket_exists(bucket_name)
 if not found_bucket:
    client.make_bucket(bucket_name)

 client.fput_object(bucket_name, object_name, user_path + '/saved_model.pb')

 # Download model from the bucket
 download_filepath = './downloaded.pb'
 client.fget_object(bucket_name, object_name, download_filepath)

 # List buckets
 buckets = client.list_buckets()
 for bucket in buckets:
    print(bucket)
    print(bucket.name, bucket.creation_date)
 ```

 %% Cell type:markdown id: tags:

 #### Log Metadata info about the model stored on minio bucket

 %% Cell type:code id: tags:

 ``` python
 #model_version = "model_version_" + str(uuid4())
 model_md = exec.log_output(
    metadata.Model(
            name="Custom mnist",
            description="model to differentiate 3 from 6, handwritten digits",
            owner="someone@kubeflow.org",
            uri="minio-service.kubeflow:9000/" + bucket_name + "/saved_model.pb",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v2.1"
            },
            hyperparameters={
                "learning_rate": 0.001,
                "nodes_number": [32]
            },
            version=model_version,
            labels={"isTest": "yes"}))
 print(model_md)
 ```

 %% Cell type:markdown id: tags:

+### Model Basic Lineage Tracking
+
+To see the full list of artifacts, navigate to:
+https://ml.cern.ch/_/metadata/#/artifacts
+
+%% Cell type:code id: tags:
+
+``` python
+print("Model id is %s\n" % model_md.id)
+
+model_events = ws1.store.get_events_by_artifact_ids([model_md.id])
+
+execution_ids = set(e.execution_id for e in model_events)
+print("All executions related to the model are {}".format(execution_ids))
+# assert execution_ids == set([serving_application.id, exec.id])
+
+trainer_events = ws1.store.get_events_by_execution_ids([exec.id])
+artifact_ids = set(e.artifact_id for e in trainer_events)
+print("All artifacts related to the training event are {}".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])
+```
+
+%% Cell type:markdown id: tags:
+
+## Advanced Options Below
+- Require access to a remote S3 bucket storage (CERN, AWS, GCP, etc)
+- Require access to registry.cern.ch
+
+%% Cell type:markdown id: tags:
+
 ### Store model on s3.cern.ch centralized object storage

 - To use this, please make sure you have access to a bucket on s3.cern.ch
 - Credentials should be stored in ~/.aws/credentials
 - Bucket needs to have writing permissions
 - More info: https://clouddocs.web.cern.ch/object_store/README.html
 - If not, it is likely to get error: Unable to locate credentials

 %% Cell type:code id: tags:

 ``` python
 import boto3

 bucket_name = 'test_bucket'
 client = boto3.client('s3', endpoint_url='https://s3.cern.ch')
 client.upload_file(user_path + '/saved_model.pb', bucket_name, 'saved_model.pb')
 ```

 %% Cell type:markdown id: tags:

 #### Log Metadata info about the model stored on s3.cern.ch bucket

 %% Cell type:code id: tags:

 ``` python
 #model_version = "model_version_" + str(uuid4())
 model_md = exec.log_output(
    metadata.Model(
            name="Custom mnist",
            description="model to differentiate 3 from 6, handwritten digits",
            owner="someone@kubeflow.org",
            uri="s3.cern.ch/" + bucket_name + "/saved_model.pb",
            model_type="neural network",
            training_framework={
                "name": "tensorflow",
                "version": "v2.1"
            },
            hyperparameters={
                "learning_rate": 0.001,
                "nodes_number": [32]
            },
            version=model_version,
            labels={"isTest": "yes"}))
 print(model_md)
 ```

 %% Cell type:markdown id: tags:

 ### Store model on registry.cern.ch, CERN OCI registry
 - Login at registry.cern.ch
 - Create a project with a custom PROJECT_NAME
 - At top right corner, click on username
 - Select User Profile
 - Copy CLI secret
 - In this Notebook server, open new Terminal window
 - cd /ormb
 - ./bin/ormb login registry.cern.ch -u USERNAME
 - Paste copied CLI secret
 - Make sure model directory structure corresponds to requirements
    - https://github.com/kleveross/ormb/blob/master/docs/tutorial.md
 - In the folder where the saved model is located, create ormbfile.yaml
    - https://github.com/kleveross/ormb/blob/master/examples/SavedModel-fashion/ormbfile.yaml
 - ./bin/ormb save /eos/user/LETTER/USERNAME/test_tf_model registry.cern.ch/PROJECT_NAME/test_tf_model:v1
 - ./bin/ormb push registry.cern.ch/PROJECT_NAME/test_tf_model:v1
 - At registry.cern.ch check the status of the uploaded model
-
-%% Cell type:markdown id: tags:
-
-### Model Basic Lineage Tracking
-
-To see the full list of artifacts, navigate to:
-https://ml.cern.ch/_/metadata/#/artifacts
-
-%% Cell type:code id: tags:
-
-``` python
-print("Model id is %s\n" % model_md.id)
-
-model_events = ws1.store.get_events_by_artifact_ids([model_md.id])
-
-execution_ids = set(e.execution_id for e in model_events)
-print("All executions related to the model are {}".format(execution_ids))
-# assert execution_ids == set([serving_application.id, exec.id])
-
-trainer_events = ws1.store.get_events_by_execution_ids([exec.id])
-artifact_ids = set(e.artifact_id for e in trainer_events)
-print("All artifacts related to the training event are {}".format(artifact_ids))# assert artifact_ids == set([model.id, metrics.id, data_set.id])
-```