Skip to content
Snippets Groups Projects
Commit d08d6205 authored by Dejan Golubovic's avatar Dejan Golubovic
Browse files

Change protobuf version

parent d059d189
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
### Install kubeflow-fairing
%% Cell type:code id: tags:
``` python
!pip3 install kubeflow-fairing --use-deprecated=legacy-resolver
!pip3 install msrestazure
!pip3 install protobuf==3.15.8
# Restart Kernel After Installation
```
%% Cell type:markdown id: tags:
### Setup Docker registry credentials for Kubernetes configmap
%% Cell type:code id: tags:
``` python
import json
import os
import subprocess
DOCKER_REGISTRY = 'index.docker.io/USERNAME'
docker_config = {
"auths": {
"https://index.docker.io/v1/": {
"username": "USERNAME",
"auth": "echo -n 'USERNAME:PASSWORD' | base64"}
},
"HttpHeaders": {
"User-Agent": "Docker-Client/19.03.12 (linux)"
}
}
with open('docker_config.json', 'w') as f:
json.dump(docker_config, f)
try:
docker_config_output = subprocess.check_output(["kubectl", "create", "configmap",
"docker-config", "--from-file=docker_config.json"],stderr=subprocess.STDOUT)
except subprocess.CalledProcessError as e:
raise RuntimeError("command '{}' return with error (code {}): {}".format(e.cmd, e.returncode, e.output))
print(docker_config_output.decode('utf-8'))
```
%% Cell type:markdown id: tags:
### Create a Persistent Volume Claim (PVC) to store the model
%% Cell type:code id: tags:
``` python
pvc_output = subprocess.check_output(["kubectl", "apply", "-f", "fairing-pvc.yaml"])
print(pvc_output.decode('utf-8'))
pvc_name = subprocess.check_output(["kubectl", "get", "pvc", "fairing-pvc", \
"-o=jsonpath='{.metadata.name}"]).decode("utf-8")[1:]
pv_name = subprocess.check_output(["kubectl", "get", "pvc", "fairing-pvc", \
"-o=jsonpath='{.spec.volumeName}"]).decode("utf-8")[1:]
print('pvc_name:', pvc_name)
print('pv_name:', pv_name)
```
%% Cell type:markdown id: tags:
### Setup model training using Kubeflow Fairing and TFJob
%% Cell type:code id: tags:
``` python
num_chief = 1 # number of Chief workers in TFJob
num_ps = 1 # number of Parameter Servers in TFJob
num_workers = 2 # number of Workers in TFJob
model_dir = "/mnt"
export_path = "/mnt/export"
train_steps = "1000"
batch_size = "100"
learning_rate = "0.01"
```
%% Cell type:code id: tags:
``` python
import uuid
from kubeflow import fairing
from kubeflow.fairing.kubernetes.utils import mounting_pvc
from kubeflow.fairing.builders.cluster.minio_context import MinioContextSource
import kfp
tfjob_name = f'mnist-training-{uuid.uuid4().hex[:4]}'
tfjob_namespace = kfp.Client().get_user_namespace()
print(tfjob_name)
print(tfjob_namespace)
output_map = {
"Dockerfile": "Dockerfile",
"mnist.py": "mnist.py"
}
context_source = ''
command=["python",
"/opt/mnist.py",
"--tf-model-dir=" + model_dir,
"--tf-export-dir=" + export_path,
"--tf-train-steps=" + train_steps,
"--tf-batch-size=" + batch_size,
"--tf-learning-rate=" + learning_rate]
minio_context_source = MinioContextSource(
endpoint_url='http://minio-service.kubeflow:9000/',
minio_secret='minio',
minio_secret_key='minio123',#
region_name='region')
fairing.config.set_preprocessor('python',
command=command,
path_prefix="/app",
output_map=output_map)
fairing.config.set_builder(
name='cluster',
registry=DOCKER_REGISTRY,
context_source=minio_context_source,
cleanup=True,
pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)]
)
fairing.config.set_deployer(
name='tfjob',
namespace=tfjob_namespace,
stream_log=False,
job_name=tfjob_name,
chief_count=num_chief,
worker_count=num_workers,
ps_count=num_ps,
pod_spec_mutators=[mounting_pvc(pvc_name=pvc_name, pvc_mount_path=model_dir)]
)
```
%% Cell type:markdown id: tags:
### Start training job
%% Cell type:code id: tags:
``` python
fairing.config.run()
```
%% Cell type:markdown id: tags:
### Inspect the running training job
%% Cell type:code id: tags:
``` python
from kubeflow.tfjob import TFJobClient
tfjob_client = TFJobClient()
#tfjob_client.get(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:code id: tags:
``` python
tfjob_client.wait_for_job(tfjob_name, namespace=tfjob_namespace, watch=True)
```
%% Cell type:code id: tags:
``` python
tfjob_client.is_job_succeeded(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:code id: tags:
``` python
#tfjob_client.get_logs(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:markdown id: tags:
### Serve the trained model
%% Cell type:code id: tags:
``` python
from kubeflow.fairing.deployers.kfserving.kfserving import KFServing
isvc_name = f'mnist-service-{uuid.uuid4().hex[:4]}'
print(isvc_name)
isvc = KFServing('tensorflow',
namespace=tfjob_namespace,
isvc_name=isvc_name,
default_storage_uri='pvc://' + pvc_name + '/export')
isvc.deploy(isvc.generate_isvc())
```
%% Cell type:code id: tags:
``` python
from kfserving import KFServingClient
kfserving_client = KFServingClient()
mnist_isvc = kfserving_client.get(isvc_name, namespace=tfjob_namespace)
mnist_isvc_name = mnist_isvc['metadata']['name']
mnist_isvc_endpoint = mnist_isvc['status'].get('url', '')
print("MNIST Service Endpoint: " + mnist_isvc_endpoint)
```
%% Cell type:code id: tags:
``` python
MODEL_HOST = f"Host: {mnist_isvc_name}-predictor-default.{tfjob_namespace}.example.com"
MODEL_URL = f"http://ml.cern.ch/v1/models/{mnist_isvc_name}:predict"
print(MODEL_HOST)
print(MODEL_URL)
!curl -H @cookie -H "{MODEL_HOST}" {MODEL_URL} -d @./input.json
```
%% Cell type:markdown id: tags:
### Delete training job and inference service
%% Cell type:code id: tags:
``` python
tfjob_client.delete(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:code id: tags:
``` python
kfserving_client.delete(isvc_name, namespace=tfjob_namespace)
```
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment