Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
E
examples
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Iterations
Requirements
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Test cases
Artifacts
Deploy
Releases
Package registry
Container registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Insights
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
ai-ml
examples
Commits
d08d6205
There was an error fetching the commit references. Please try again later.
Commit
d08d6205
authored
3 years ago
by
Dejan Golubovic
Browse files
Options
Downloads
Patches
Plain Diff
Change protobuf version
parent
d059d189
No related branches found
No related tags found
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
fairing/fairing-mnist.ipynb
+2
-1
2 additions, 1 deletion
fairing/fairing-mnist.ipynb
with
2 additions
and
1 deletion
fairing/fairing-mnist.ipynb
+
2
−
1
View file @
d08d6205
...
...
@@ -17,6 +17,7 @@
"source": [
"!pip3 install kubeflow-fairing --use-deprecated=legacy-resolver\n",
"!pip3 install msrestazure\n",
"!pip3 install protobuf==3.15.8\n",
"# Restart Kernel After Installation"
]
},
...
...
@@ -379,4 +380,4 @@
},
"nbformat": 4,
"nbformat_minor": 4
}
\ No newline at end of file
}
%% Cell type:markdown id: tags:
### Install kubeflow-fairing
%% Cell type:code id: tags:
```
python
!
pip3
install
kubeflow
-
fairing
--
use
-
deprecated
=
legacy
-
resolver
!
pip3
install
msrestazure
!
pip3
install
protobuf
==
3.15
.
8
# Restart Kernel After Installation
```
%% Cell type:markdown id: tags:
### Setup Docker registry credentials for Kubernetes configmap
%% Cell type:code id: tags:
```
python
import
json
import
os
import
subprocess
DOCKER_REGISTRY
=
'
index.docker.io/USERNAME
'
docker_config
=
{
"
auths
"
:
{
"
https://index.docker.io/v1/
"
:
{
"
username
"
:
"
USERNAME
"
,
"
auth
"
:
"
echo -n
'
USERNAME:PASSWORD
'
| base64
"
}
},
"
HttpHeaders
"
:
{
"
User-Agent
"
:
"
Docker-Client/19.03.12 (linux)
"
}
}
with
open
(
'
docker_config.json
'
,
'
w
'
)
as
f
:
json
.
dump
(
docker_config
,
f
)
try
:
docker_config_output
=
subprocess
.
check_output
([
"
kubectl
"
,
"
create
"
,
"
configmap
"
,
"
docker-config
"
,
"
--from-file=docker_config.json
"
],
stderr
=
subprocess
.
STDOUT
)
except
subprocess
.
CalledProcessError
as
e
:
raise
RuntimeError
(
"
command
'
{}
'
return with error (code {}): {}
"
.
format
(
e
.
cmd
,
e
.
returncode
,
e
.
output
))
print
(
docker_config_output
.
decode
(
'
utf-8
'
))
```
%% Cell type:markdown id: tags:
### Create a Persistent Volume Claim (PVC) to store the model
%% Cell type:code id: tags:
```
python
pvc_output
=
subprocess
.
check_output
([
"
kubectl
"
,
"
apply
"
,
"
-f
"
,
"
fairing-pvc.yaml
"
])
print
(
pvc_output
.
decode
(
'
utf-8
'
))
pvc_name
=
subprocess
.
check_output
([
"
kubectl
"
,
"
get
"
,
"
pvc
"
,
"
fairing-pvc
"
,
\
"
-o=jsonpath=
'
{.metadata.name}
"
]).
decode
(
"
utf-8
"
)[
1
:]
pv_name
=
subprocess
.
check_output
([
"
kubectl
"
,
"
get
"
,
"
pvc
"
,
"
fairing-pvc
"
,
\
"
-o=jsonpath=
'
{.spec.volumeName}
"
]).
decode
(
"
utf-8
"
)[
1
:]
print
(
'
pvc_name:
'
,
pvc_name
)
print
(
'
pv_name:
'
,
pv_name
)
```
%% Cell type:markdown id: tags:
### Setup model training using Kubeflow Fairing and TFJob
%% Cell type:code id: tags:
```
python
num_chief
=
1
# number of Chief workers in TFJob
num_ps
=
1
# number of Parameter Servers in TFJob
num_workers
=
2
# number of Workers in TFJob
model_dir
=
"
/mnt
"
export_path
=
"
/mnt/export
"
train_steps
=
"
1000
"
batch_size
=
"
100
"
learning_rate
=
"
0.01
"
```
%% Cell type:code id: tags:
```
python
import
uuid
from
kubeflow
import
fairing
from
kubeflow.fairing.kubernetes.utils
import
mounting_pvc
from
kubeflow.fairing.builders.cluster.minio_context
import
MinioContextSource
import
kfp
tfjob_name
=
f
'
mnist-training-
{
uuid
.
uuid4
().
hex
[
:
4
]
}
'
tfjob_namespace
=
kfp
.
Client
().
get_user_namespace
()
print
(
tfjob_name
)
print
(
tfjob_namespace
)
output_map
=
{
"
Dockerfile
"
:
"
Dockerfile
"
,
"
mnist.py
"
:
"
mnist.py
"
}
context_source
=
''
command
=
[
"
python
"
,
"
/opt/mnist.py
"
,
"
--tf-model-dir=
"
+
model_dir
,
"
--tf-export-dir=
"
+
export_path
,
"
--tf-train-steps=
"
+
train_steps
,
"
--tf-batch-size=
"
+
batch_size
,
"
--tf-learning-rate=
"
+
learning_rate
]
minio_context_source
=
MinioContextSource
(
endpoint_url
=
'
http://minio-service.kubeflow:9000/
'
,
minio_secret
=
'
minio
'
,
minio_secret_key
=
'
minio123
'
,
#
region_name
=
'
region
'
)
fairing
.
config
.
set_preprocessor
(
'
python
'
,
command
=
command
,
path_prefix
=
"
/app
"
,
output_map
=
output_map
)
fairing
.
config
.
set_builder
(
name
=
'
cluster
'
,
registry
=
DOCKER_REGISTRY
,
context_source
=
minio_context_source
,
cleanup
=
True
,
pod_spec_mutators
=
[
mounting_pvc
(
pvc_name
=
pvc_name
,
pvc_mount_path
=
model_dir
)]
)
fairing
.
config
.
set_deployer
(
name
=
'
tfjob
'
,
namespace
=
tfjob_namespace
,
stream_log
=
False
,
job_name
=
tfjob_name
,
chief_count
=
num_chief
,
worker_count
=
num_workers
,
ps_count
=
num_ps
,
pod_spec_mutators
=
[
mounting_pvc
(
pvc_name
=
pvc_name
,
pvc_mount_path
=
model_dir
)]
)
```
%% Cell type:markdown id: tags:
### Start training job
%% Cell type:code id: tags:
```
python
fairing
.
config
.
run
()
```
%% Cell type:markdown id: tags:
### Inspect the running training job
%% Cell type:code id: tags:
```
python
from
kubeflow.tfjob
import
TFJobClient
tfjob_client
=
TFJobClient
()
#tfjob_client.get(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:code id: tags:
```
python
tfjob_client
.
wait_for_job
(
tfjob_name
,
namespace
=
tfjob_namespace
,
watch
=
True
)
```
%% Cell type:code id: tags:
```
python
tfjob_client
.
is_job_succeeded
(
tfjob_name
,
namespace
=
tfjob_namespace
)
```
%% Cell type:code id: tags:
```
python
#tfjob_client.get_logs(tfjob_name, namespace=tfjob_namespace)
```
%% Cell type:markdown id: tags:
### Serve the trained model
%% Cell type:code id: tags:
```
python
from
kubeflow.fairing.deployers.kfserving.kfserving
import
KFServing
isvc_name
=
f
'
mnist-service-
{
uuid
.
uuid4
().
hex
[
:
4
]
}
'
print
(
isvc_name
)
isvc
=
KFServing
(
'
tensorflow
'
,
namespace
=
tfjob_namespace
,
isvc_name
=
isvc_name
,
default_storage_uri
=
'
pvc://
'
+
pvc_name
+
'
/export
'
)
isvc
.
deploy
(
isvc
.
generate_isvc
())
```
%% Cell type:code id: tags:
```
python
from
kfserving
import
KFServingClient
kfserving_client
=
KFServingClient
()
mnist_isvc
=
kfserving_client
.
get
(
isvc_name
,
namespace
=
tfjob_namespace
)
mnist_isvc_name
=
mnist_isvc
[
'
metadata
'
][
'
name
'
]
mnist_isvc_endpoint
=
mnist_isvc
[
'
status
'
].
get
(
'
url
'
,
''
)
print
(
"
MNIST Service Endpoint:
"
+
mnist_isvc_endpoint
)
```
%% Cell type:code id: tags:
```
python
MODEL_HOST
=
f
"
Host:
{
mnist_isvc_name
}
-predictor-default.
{
tfjob_namespace
}
.example.com
"
MODEL_URL
=
f
"
http://ml.cern.ch/v1/models/
{
mnist_isvc_name
}
:predict
"
print
(
MODEL_HOST
)
print
(
MODEL_URL
)
!
curl
-
H
@cookie
-
H
"
{MODEL_HOST}
"
{
MODEL_URL
}
-
d
@.
/
input
.
json
```
%% Cell type:markdown id: tags:
### Delete training job and inference service
%% Cell type:code id: tags:
```
python
tfjob_client
.
delete
(
tfjob_name
,
namespace
=
tfjob_namespace
)
```
%% Cell type:code id: tags:
```
python
kfserving_client
.
delete
(
isvc_name
,
namespace
=
tfjob_namespace
)
```
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment