Add kfp eos example

e2e737f3 · Dejan Golubovic · be13af84 · e2e737f3 · e2e737f3
Commit e2e737f3 authored 3 years ago by Dejan Golubovic
--- a/mnist-kfp/README.md
+++ b/mnist-kfp/README.md
@@ -12,7 +12,7 @@ Store intermediate data on personal EOS.

 ### How to run?

- Open **mnist-kfp/mnist-kfp-eos.ipynb** in your Notebook server
+- Open **mnist-kfp/mnist-kfp.ipynb** in your Notebook server
 - Run all the cells
 - Download created pipeline .yaml file
 - Open Pipelines
@@ -21,4 +21,21 @@ Store intermediate data on personal EOS.
 - Click Create Run
 - The experiment should finish as Successful

-Current issue with EOS: Fix by reading local credentials and pass it to every cell.
\ No newline at end of file
+## Example - mnist-kfp-eos
+
+### What is it about?
+
+Same as above, only with EOS access.
+
+### How to run?
+
+- Open a notebook terminal
+- Authenticate with kerberos
+    - `kinit <cernid>`
+- When kerberos has been refreshed, remove any old secret before creating a new one
+    - `kubectl delete secret krb-secret`
+- Create a kerberos secret for Kubernetes
+    - `kubectl create secret generic krb-secret --from-file=/tmp/krb5cc_1000`
+- Open **mnist-kfp/mnist-kfp-eos.ipynb** in your Notebook server
+- Run the cells
+
--- a/mnist-kfp/mnist-kfp-eos.ipynb
+++ b/mnist-kfp/mnist-kfp-eos.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "colab_type": "text",
+    "id": "j6331ZSsQGY3"
+   },
+   "source": [
+    "# MNIST classification"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Imports for Compilation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import kfp\n",
+    "from kfp.components import func_to_container_op, InputPath, OutputPath\n",
+    "from kfp import dsl\n",
+    "from kubernetes import client as k8s_client\n",
+    "import yaml"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Read Data Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def read_data(output_text_path: OutputPath(str), base_image='a'):\n",
+    "    import tensorflow as tf\n",
+    "    import numpy as np\n",
+    "    import os\n",
+    "    from zipfile import ZipFile\n",
+    "    \n",
+    "    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n",
+    "\n",
+    "    # Rescale the images from [0,255] to the [0.0,1.0] range.\n",
+    "    x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0\n",
+    "    \n",
+    "    os.system('pwd')\n",
+    "    os.system('ls -la')\n",
+    "    os.system('whoami')\n",
+    "\n",
+    "    np.save('xtrain.npy', x_train)\n",
+    "    np.save('ytrain.npy', y_train)\n",
+    "\n",
+    "    np.save('xtest.npy', x_test)\n",
+    "    np.save('ytest.npy', y_test)\n",
+    "    \n",
+    "    zipObj = ZipFile(output_text_path, 'w')\n",
+    "    \n",
+    "    zipObj.write('xtrain.npy')\n",
+    "    zipObj.write('ytrain.npy')\n",
+    "    zipObj.write('xtest.npy')\n",
+    "    zipObj.write('ytest.npy')\n",
+    "    \n",
+    "    zipObj.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Read Data Pipeline Component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "read_data_comp = func_to_container_op(\n",
+    "    func=read_data,\n",
+    "    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Preprocess Data Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def preprocess_data(text_path: InputPath(), output_text_path: OutputPath()):\n",
+    "    import numpy as np\n",
+    "    import os\n",
+    "    import tarfile\n",
+    "    print('tarfile imported')\n",
+    "    from zipfile import ZipFile\n",
+    "    \n",
+    "    with ZipFile(text_path, 'r') as zipObj:\n",
+    "       zipObj.extractall()\n",
+    "    \n",
+    "    # Load data\n",
+    "    x_train = np.load('xtrain.npy')\n",
+    "    y_train = np.load('ytrain.npy')\n",
+    "\n",
+    "    x_test = np.load('xtest.npy')\n",
+    "    y_test = np.load('ytest.npy')\n",
+    "    \n",
+    "    # Filter 3 and 6\n",
+    "    def filter_36(x, y):\n",
+    "        keep = (y == 3) | (y == 6)\n",
+    "        x, y = x[keep], y[keep]\n",
+    "        y = y == 3\n",
+    "        return x,y\n",
+    "    \n",
+    "    print(\"Number of unfiltered training examples:\", len(x_train))\n",
+    "    print(\"Number of unfiltered test examples:\", len(x_test))\n",
+    "    \n",
+    "    x_train, y_train = filter_36(x_train, y_train)\n",
+    "    x_test, y_test = filter_36(x_test, y_test)\n",
+    "\n",
+    "    print(\"Number of filtered training examples:\", len(x_train))\n",
+    "    print(\"Number of filtered test examples:\", len(x_test))\n",
+    "    \n",
+    "    # Save modified data\n",
+    "    np.save('xtrain_filtered.npy', x_train)\n",
+    "    np.save('ytrain_filtered.npy', y_train)\n",
+    "\n",
+    "    np.save('xtest_filtered.npy', x_test)\n",
+    "    np.save('ytest_filtered.npy', y_test)\n",
+    "    \n",
+    "    zipObj = ZipFile(output_text_path, 'w')\n",
+    "    \n",
+    "    zipObj.write('xtrain_filtered.npy')\n",
+    "    zipObj.write('ytrain_filtered.npy')\n",
+    "    zipObj.write('xtest_filtered.npy')\n",
+    "    zipObj.write('ytest_filtered.npy')\n",
+    "    \n",
+    "    zipObj.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Preprocess Data Pipeline Component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "preprocess_data_comp = func_to_container_op(\n",
+    "    func=preprocess_data,\n",
+    "    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Train Full Model Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def model_full(text_path: InputPath(), output_text_path: OutputPath()):\n",
+    "    # A simple model based off LeNet from https://keras.io/examples/mnist_cnn/\n",
+    "    import tensorflow as tf\n",
+    "    from zipfile import ZipFile\n",
+    "    import numpy as np\n",
+    "    \n",
+    "    model = tf.keras.Sequential()\n",
+    "    model.add(tf.keras.layers.Conv2D(32, [3, 3], activation='relu', input_shape=(28,28,1)))\n",
+    "    model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu'))\n",
+    "    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n",
+    "    model.add(tf.keras.layers.Dropout(0.25))\n",
+    "    model.add(tf.keras.layers.Flatten())\n",
+    "    model.add(tf.keras.layers.Dense(128, activation='relu'))\n",
+    "    model.add(tf.keras.layers.Dropout(0.5))\n",
+    "    model.add(tf.keras.layers.Dense(1))\n",
+    "\n",
+    "    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
+    "                  optimizer=tf.keras.optimizers.Adam(),\n",
+    "                  metrics=['accuracy'])\n",
+    "\n",
+    "    model.summary()\n",
+    "    \n",
+    "    with ZipFile(text_path, 'r') as zipObj:\n",
+    "       zipObj.extractall()\n",
+    "    \n",
+    "    # Load data\n",
+    "    x_train = np.load('xtrain_filtered.npy')\n",
+    "    y_train = np.load('ytrain_filtered.npy')\n",
+    "\n",
+    "    x_test = np.load('xtest_filtered.npy')\n",
+    "    y_test = np.load('ytest_filtered.npy')\n",
+    "    \n",
+    "    model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1, validation_data=(x_test, y_test))\n",
+    "\n",
+    "    cnn_results = model.evaluate(x_test, y_test)\n",
+    "    \n",
+    "    with open(output_text_path, 'w') as writer:\n",
+    "        writer.write(str(cnn_results) + '\\n')      "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Train Full Model Pipeline Component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_full_comp = func_to_container_op(\n",
+    "    func=model_full,\n",
+    "    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Train Fair Model Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def model_fair(text_path: InputPath(), output_text_path: OutputPath()):\n",
+    "    import tensorflow as tf\n",
+    "    from zipfile import ZipFile\n",
+    "    import numpy as np\n",
+    "    \n",
+    "    model = tf.keras.Sequential()\n",
+    "    model.add(tf.keras.layers.Flatten(input_shape=(28,28,1)))\n",
+    "    model.add(tf.keras.layers.Dense(2, activation='relu'))\n",
+    "    model.add(tf.keras.layers.Dense(1))\n",
+    "\n",
+    "    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n",
+    "                  optimizer=tf.keras.optimizers.Adam(),\n",
+    "                  metrics=['accuracy'])\n",
+    "\n",
+    "    model.summary()\n",
+    "    \n",
+    "    with ZipFile(text_path, 'r') as zipObj:\n",
+    "       zipObj.extractall()\n",
+    "    \n",
+    "    # Load data\n",
+    "    x_train = np.load('xtrain_filtered.npy')\n",
+    "    y_train = np.load('ytrain_filtered.npy')\n",
+    "\n",
+    "    x_test = np.load('xtest_filtered.npy')\n",
+    "    y_test = np.load('ytest_filtered.npy')\n",
+    "    \n",
+    "    model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1, validation_data=(x_test, y_test))\n",
+    "\n",
+    "    cnn_results = model.evaluate(x_test, y_test)\n",
+    "    \n",
+    "    with open(output_text_path, 'w') as writer:\n",
+    "        writer.write(str(cnn_results) + '\\n')  "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Train Fair Model Pipeline Component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "model_fair_comp = func_to_container_op(\n",
+    "    func=model_fair,\n",
+    "    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Evaluate Models Function"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def models_evaluate(text_path_0: InputPath(), text_path_1: InputPath()):\n",
+    "    print('model 0:')\n",
+    "    with open(text_path_0, 'r') as reader:\n",
+    "        for line in reader:\n",
+    "            print(line, end = '')\n",
+    "    print('model 1:')\n",
+    "    with open(text_path_1, 'r') as reader:\n",
+    "        for line in reader:\n",
+    "            print(line, end = '')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Evaluate Models Pipeline Component"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "models_evaluate_comp = func_to_container_op(\n",
+    "    func=models_evaluate,\n",
+    "    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Create Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "krb_secret = k8s_client.V1SecretVolumeSource(secret_name='krb-secret')\n",
+    "krb_secret_volume = k8s_client.V1Volume(name='krb-secret-vol', secret=krb_secret)\n",
+    "krb_secret_volume_mount = k8s_client.V1VolumeMount(name=krb_secret_volume.name, mount_path='/secret/krb-secret-vol')\n",
+    "\n",
+    "eos_host_path = k8s_client.V1HostPathVolumeSource(path='/var/eos')\n",
+    "eos_volume = k8s_client.V1Volume(name='eos', host_path=eos_host_path)\n",
+    "eos_volume_mount = k8s_client.V1VolumeMount(name=eos_volume.name, mount_path='/eos')\n",
+    "\n",
+    "@dsl.pipeline(\n",
+    "    name='test-eos-kfp',\n",
+    "    description='TEST EOS).'\n",
+    ")\n",
+    "def ml_pipeline_first():\n",
+    "    data_dir = read_data_comp() \\\n",
+    "                .add_volume(krb_secret_volume) \\\n",
+    "                .add_volume_mount(krb_secret_volume_mount) \\\n",
+    "                .add_volume(eos_volume) \\\n",
+    "                .add_volume_mount(eos_volume_mount)\n",
+    "    \n",
+    "    new_dir = preprocess_data_comp(data_dir.output) \\\n",
+    "                .add_volume(krb_secret_volume) \\\n",
+    "                .add_volume_mount(krb_secret_volume_mount) \\\n",
+    "                .add_volume(eos_volume) \\\n",
+    "                .add_volume_mount(eos_volume_mount)\n",
+    "                        \n",
+    "    cnn_res = model_full_comp(new_dir.output) \\\n",
+    "                .add_volume(krb_secret_volume) \\\n",
+    "                .add_volume_mount(krb_secret_volume_mount) \\\n",
+    "                .add_volume(eos_volume) \\\n",
+    "                .add_volume_mount(eos_volume_mount)\n",
+    "    \n",
+    "    fairnn_res = model_fair_comp(new_dir.output) \\\n",
+    "                .add_volume(krb_secret_volume) \\\n",
+    "                .add_volume_mount(krb_secret_volume_mount) \\\n",
+    "                .add_volume(eos_volume) \\\n",
+    "                .add_volume_mount(eos_volume_mount)\n",
+    "    \n",
+    "    models_evaluate_comp(cnn_res.output, fairnn_res.output) \\\n",
+    "                .add_volume(krb_secret_volume) \\\n",
+    "                .add_volume_mount(krb_secret_volume_mount) \\\n",
+    "                .add_volume(eos_volume) \\\n",
+    "                .add_volume_mount(eos_volume_mount)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Compile Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pipeline_name = 'example_kfp_pipeline_20'\n",
+    "pipeline_file = pipeline_name + '.yaml'\n",
+    "experiment_name = 'example_kfp_experiment'\n",
+    "client = kfp.Client()\n",
+    "\n",
+    "workflow = kfp.compiler.Compiler().compile(ml_pipeline_first, pipeline_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Function for Accessing EOS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "def post_process(pipeline_file, outfile):\n",
+    "    with open(pipeline_file, \"r\") as stream:\n",
+    "        pip_dict = yaml.safe_load(stream)\n",
+    "        \n",
+    "    copy_command = 'cp /secret/krb-secret-vol/krb5cc_1000 /tmp/krb5cc_1000'\n",
+    "    chmod_command = 'chmod 600 /tmp/krb5cc_1000'\n",
+    "        \n",
+    "    for template in pip_dict['spec']['templates']:\n",
+    "        if 'container' in template.keys():\n",
+    "            component_command_list = template['container']['command'][2].split('\\n')\n",
+    "            component_command_list.insert(2, copy_command)\n",
+    "            component_command_list.insert(3, chmod_command)\n",
+    "            \n",
+    "            # Check EOS access with this command\n",
+    "            # component_command_list.insert(4, 'ls -l /eos/user/d/dgolubov')\n",
+    "            joined_string = '\\n'.join(component_command_list)\n",
+    "\n",
+    "            template['container']['command'][2] = joined_string\n",
+    "            \n",
+    "    with open(outfile, 'w') as outfile:\n",
+    "        yaml.dump(pip_dict, outfile, default_flow_style=False)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Apply Access to EOS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "post_process(pipeline_file, pipeline_file)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Upload and Run Pipeline"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.upload_pipeline(pipeline_file, pipeline_name)\n",
+    "exp = client.create_experiment(name=experiment_name)\n",
+    "run = client.run_pipeline(exp.id, pipeline_name, pipeline_file)"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "collapsed_sections": [],
+   "name": "mnist.ipynb",
+   "private_outputs": true,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "kubeflow_notebook": {
+   "autosnapshot": false,
+   "docker_image": "gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-30",
+   "experiment": {
+    "id": "",
+    "name": ""
+   },
+   "experiment_name": "",
+   "katib_metadata": {
+    "algorithm": {
+     "algorithmName": "grid"
+    },
+    "maxFailedTrialCount": 3,
+    "maxTrialCount": 12,
+    "objective": {
+     "objectiveMetricName": "",
+     "type": "minimize"
+    },
+    "parallelTrialCount": 3,
+    "parameters": []
+   },
+   "katib_run": false,
+   "pipeline_description": "",
+   "pipeline_name": "",
+   "snapshot_volumes": false,
+   "steps_defaults": [],
+   "volume_access_mode": "rwm",
+   "volumes": []
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.9"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
+%% Cell type:markdown id: tags:
+
+# MNIST classification
+
+%% Cell type:markdown id: tags:
+
+### Imports for Compilation
+
+%% Cell type:code id: tags:
+
+``` python
+import kfp
+from kfp.components import func_to_container_op, InputPath, OutputPath
+from kfp import dsl
+from kubernetes import client as k8s_client
+import yaml
+```
+
+%% Cell type:markdown id: tags:
+
+### Read Data Function
+
+%% Cell type:code id: tags:
+
+``` python
+def read_data(output_text_path: OutputPath(str), base_image='a'):
+    import tensorflow as tf
+    import numpy as np
+    import os
+    from zipfile import ZipFile
+
+    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
+
+    # Rescale the images from [0,255] to the [0.0,1.0] range.
+    x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0
+
+    os.system('pwd')
+    os.system('ls -la')
+    os.system('whoami')
+
+    np.save('xtrain.npy', x_train)
+    np.save('ytrain.npy', y_train)
+
+    np.save('xtest.npy', x_test)
+    np.save('ytest.npy', y_test)
+
+    zipObj = ZipFile(output_text_path, 'w')
+
+    zipObj.write('xtrain.npy')
+    zipObj.write('ytrain.npy')
+    zipObj.write('xtest.npy')
+    zipObj.write('ytest.npy')
+
+    zipObj.close()
+```
+
+%% Cell type:markdown id: tags:
+
+### Read Data Pipeline Component
+
+%% Cell type:code id: tags:
+
+``` python
+read_data_comp = func_to_container_op(
+    func=read_data,
+    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'
+)
+```
+
+%% Cell type:markdown id: tags:
+
+### Preprocess Data Function
+
+%% Cell type:code id: tags:
+
+``` python
+def preprocess_data(text_path: InputPath(), output_text_path: OutputPath()):
+    import numpy as np
+    import os
+    import tarfile
+    print('tarfile imported')
+    from zipfile import ZipFile
+
+    with ZipFile(text_path, 'r') as zipObj:
+       zipObj.extractall()
+
+    # Load data
+    x_train = np.load('xtrain.npy')
+    y_train = np.load('ytrain.npy')
+
+    x_test = np.load('xtest.npy')
+    y_test = np.load('ytest.npy')
+
+    # Filter 3 and 6
+    def filter_36(x, y):
+        keep = (y == 3) | (y == 6)
+        x, y = x[keep], y[keep]
+        y = y == 3
+        return x,y
+
+    print("Number of unfiltered training examples:", len(x_train))
+    print("Number of unfiltered test examples:", len(x_test))
+
+    x_train, y_train = filter_36(x_train, y_train)
+    x_test, y_test = filter_36(x_test, y_test)
+
+    print("Number of filtered training examples:", len(x_train))
+    print("Number of filtered test examples:", len(x_test))
+
+    # Save modified data
+    np.save('xtrain_filtered.npy', x_train)
+    np.save('ytrain_filtered.npy', y_train)
+
+    np.save('xtest_filtered.npy', x_test)
+    np.save('ytest_filtered.npy', y_test)
+
+    zipObj = ZipFile(output_text_path, 'w')
+
+    zipObj.write('xtrain_filtered.npy')
+    zipObj.write('ytrain_filtered.npy')
+    zipObj.write('xtest_filtered.npy')
+    zipObj.write('ytest_filtered.npy')
+
+    zipObj.close()
+```
+
+%% Cell type:markdown id: tags:
+
+### Preprocess Data Pipeline Component
+
+%% Cell type:code id: tags:
+
+``` python
+preprocess_data_comp = func_to_container_op(
+    func=preprocess_data,
+    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'
+)
+```
+
+%% Cell type:markdown id: tags:
+
+### Train Full Model Function
+
+%% Cell type:code id: tags:
+
+``` python
+def model_full(text_path: InputPath(), output_text_path: OutputPath()):
+    # A simple model based off LeNet from https://keras.io/examples/mnist_cnn/
+    import tensorflow as tf
+    from zipfile import ZipFile
+    import numpy as np
+
+    model = tf.keras.Sequential()
+    model.add(tf.keras.layers.Conv2D(32, [3, 3], activation='relu', input_shape=(28,28,1)))
+    model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu'))
+    model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
+    model.add(tf.keras.layers.Dropout(0.25))
+    model.add(tf.keras.layers.Flatten())
+    model.add(tf.keras.layers.Dense(128, activation='relu'))
+    model.add(tf.keras.layers.Dropout(0.5))
+    model.add(tf.keras.layers.Dense(1))
+
+    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+                  optimizer=tf.keras.optimizers.Adam(),
+                  metrics=['accuracy'])
+
+    model.summary()
+
+    with ZipFile(text_path, 'r') as zipObj:
+       zipObj.extractall()
+
+    # Load data
+    x_train = np.load('xtrain_filtered.npy')
+    y_train = np.load('ytrain_filtered.npy')
+
+    x_test = np.load('xtest_filtered.npy')
+    y_test = np.load('ytest_filtered.npy')
+
+    model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1, validation_data=(x_test, y_test))
+
+    cnn_results = model.evaluate(x_test, y_test)
+
+    with open(output_text_path, 'w') as writer:
+        writer.write(str(cnn_results) + '\n')
+```
+
+%% Cell type:markdown id: tags:
+
+### Train Full Model Pipeline Component
+
+%% Cell type:code id: tags:
+
+``` python
+model_full_comp = func_to_container_op(
+    func=model_full,
+    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'
+)
+```
+
+%% Cell type:markdown id: tags:
+
+### Train Fair Model Function
+
+%% Cell type:code id: tags:
+
+``` python
+def model_fair(text_path: InputPath(), output_text_path: OutputPath()):
+    import tensorflow as tf
+    from zipfile import ZipFile
+    import numpy as np
+
+    model = tf.keras.Sequential()
+    model.add(tf.keras.layers.Flatten(input_shape=(28,28,1)))
+    model.add(tf.keras.layers.Dense(2, activation='relu'))
+    model.add(tf.keras.layers.Dense(1))
+
+    model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
+                  optimizer=tf.keras.optimizers.Adam(),
+                  metrics=['accuracy'])
+
+    model.summary()
+
+    with ZipFile(text_path, 'r') as zipObj:
+       zipObj.extractall()
+
+    # Load data
+    x_train = np.load('xtrain_filtered.npy')
+    y_train = np.load('ytrain_filtered.npy')
+
+    x_test = np.load('xtest_filtered.npy')
+    y_test = np.load('ytest_filtered.npy')
+
+    model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1, validation_data=(x_test, y_test))
+
+    cnn_results = model.evaluate(x_test, y_test)
+
+    with open(output_text_path, 'w') as writer:
+        writer.write(str(cnn_results) + '\n')
+```
+
+%% Cell type:markdown id: tags:
+
+### Train Fair Model Pipeline Component
+
+%% Cell type:code id: tags:
+
+``` python
+model_fair_comp = func_to_container_op(
+    func=model_fair,
+    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'
+)
+```
+
+%% Cell type:markdown id: tags:
+
+### Evaluate Models Function
+
+%% Cell type:code id: tags:
+
+``` python
+def models_evaluate(text_path_0: InputPath(), text_path_1: InputPath()):
+    print('model 0:')
+    with open(text_path_0, 'r') as reader:
+        for line in reader:
+            print(line, end = '')
+    print('model 1:')
+    with open(text_path_1, 'r') as reader:
+        for line in reader:
+            print(line, end = '')
+```
+
+%% Cell type:markdown id: tags:
+
+### Evaluate Models Pipeline Component
+
+%% Cell type:code id: tags:
+
+``` python
+models_evaluate_comp = func_to_container_op(
+    func=models_evaluate,
+    base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'
+)
+```
+
+%% Cell type:markdown id: tags:
+
+### Create Pipeline
+
+%% Cell type:code id: tags:
+
+``` python
+krb_secret = k8s_client.V1SecretVolumeSource(secret_name='krb-secret')
+krb_secret_volume = k8s_client.V1Volume(name='krb-secret-vol', secret=krb_secret)
+krb_secret_volume_mount = k8s_client.V1VolumeMount(name=krb_secret_volume.name, mount_path='/secret/krb-secret-vol')
+
+eos_host_path = k8s_client.V1HostPathVolumeSource(path='/var/eos')
+eos_volume = k8s_client.V1Volume(name='eos', host_path=eos_host_path)
+eos_volume_mount = k8s_client.V1VolumeMount(name=eos_volume.name, mount_path='/eos')
+
+@dsl.pipeline(
+    name='test-eos-kfp',
+    description='TEST EOS).'
+)
+def ml_pipeline_first():
+    data_dir = read_data_comp() \
+                .add_volume(krb_secret_volume) \
+                .add_volume_mount(krb_secret_volume_mount) \
+                .add_volume(eos_volume) \
+                .add_volume_mount(eos_volume_mount)
+
+    new_dir = preprocess_data_comp(data_dir.output) \
+                .add_volume(krb_secret_volume) \
+                .add_volume_mount(krb_secret_volume_mount) \
+                .add_volume(eos_volume) \
+                .add_volume_mount(eos_volume_mount)
+
+    cnn_res = model_full_comp(new_dir.output) \
+                .add_volume(krb_secret_volume) \
+                .add_volume_mount(krb_secret_volume_mount) \
+                .add_volume(eos_volume) \
+                .add_volume_mount(eos_volume_mount)
+
+    fairnn_res = model_fair_comp(new_dir.output) \
+                .add_volume(krb_secret_volume) \
+                .add_volume_mount(krb_secret_volume_mount) \
+                .add_volume(eos_volume) \
+                .add_volume_mount(eos_volume_mount)
+
+    models_evaluate_comp(cnn_res.output, fairnn_res.output) \
+                .add_volume(krb_secret_volume) \
+                .add_volume_mount(krb_secret_volume_mount) \
+                .add_volume(eos_volume) \
+                .add_volume_mount(eos_volume_mount)
+```
+
+%% Cell type:markdown id: tags:
+
+### Compile Pipeline
+
+%% Cell type:code id: tags:
+
+``` python
+pipeline_name = 'example_kfp_pipeline_20'
+pipeline_file = pipeline_name + '.yaml'
+experiment_name = 'example_kfp_experiment'
+client = kfp.Client()
+
+workflow = kfp.compiler.Compiler().compile(ml_pipeline_first, pipeline_file)
+```
+
+%% Cell type:markdown id: tags:
+
+### Function for Accessing EOS
+
+%% Cell type:code id: tags:
+
+``` python
+def post_process(pipeline_file, outfile):
+    with open(pipeline_file, "r") as stream:
+        pip_dict = yaml.safe_load(stream)
+
+    copy_command = 'cp /secret/krb-secret-vol/krb5cc_1000 /tmp/krb5cc_1000'
+    chmod_command = 'chmod 600 /tmp/krb5cc_1000'
+
+    for template in pip_dict['spec']['templates']:
+        if 'container' in template.keys():
+            component_command_list = template['container']['command'][2].split('\n')
+            component_command_list.insert(2, copy_command)
+            component_command_list.insert(3, chmod_command)
+
+            # Check EOS access with this command
+            # component_command_list.insert(4, 'ls -l /eos/user/d/dgolubov')
+            joined_string = '\n'.join(component_command_list)
+
+            template['container']['command'][2] = joined_string
+
+    with open(outfile, 'w') as outfile:
+        yaml.dump(pip_dict, outfile, default_flow_style=False)
+```
+
+%% Cell type:markdown id: tags:
+
+### Apply Access to EOS
+
+%% Cell type:code id: tags:
+
+``` python
+post_process(pipeline_file, pipeline_file)
+```
+
+%% Cell type:markdown id: tags:
+
+### Upload and Run Pipeline
+
+%% Cell type:code id: tags:
+
+``` python
+client.upload_pipeline(pipeline_file, pipeline_name)
+exp = client.create_experiment(name=experiment_name)
+run = client.run_pipeline(exp.id, pipeline_name, pipeline_file)
+```