diff --git a/mnist-kfp/mnist-kfp-eos-gpu.ipynb b/mnist-kfp/mnist-kfp-eos-gpu.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..e91aa569548538013a4736221b36535a5c00dc37 --- /dev/null +++ b/mnist-kfp/mnist-kfp-eos-gpu.ipynb @@ -0,0 +1,634 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "colab_type": "text", + "id": "j6331ZSsQGY3" + }, + "source": [ + "# MNIST classification" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Imports for Compilation" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import kfp\n", + "from kfp.components import func_to_container_op, InputPath, OutputPath\n", + "from kfp import dsl\n", + "from kubernetes import client as k8s_client\n", + "import yaml" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Read Data Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def read_data(output_text_path: OutputPath(str), base_image='a'):\n", + " import tensorflow as tf\n", + " import numpy as np\n", + " import os\n", + " from zipfile import ZipFile\n", + " \n", + " (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()\n", + "\n", + " # Rescale the images from [0,255] to the [0.0,1.0] range.\n", + " x_train, x_test = x_train[..., np.newaxis]/255.0, x_test[..., np.newaxis]/255.0\n", + " \n", + " os.system('pwd')\n", + " os.system('ls -la')\n", + " os.system('whoami')\n", + "\n", + " np.save('xtrain.npy', x_train)\n", + " np.save('ytrain.npy', y_train)\n", + "\n", + " np.save('xtest.npy', x_test)\n", + " np.save('ytest.npy', y_test)\n", + " \n", + " zipObj = ZipFile(output_text_path, 'w')\n", + " \n", + " zipObj.write('xtrain.npy')\n", + " zipObj.write('ytrain.npy')\n", + " zipObj.write('xtest.npy')\n", + " zipObj.write('ytest.npy')\n", + " \n", + " zipObj.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Read Data Pipeline Component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "read_data_comp = func_to_container_op(\n", + " func=read_data,\n", + " base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Preprocess Data Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def preprocess_data(text_path: InputPath(), output_text_path: OutputPath()):\n", + " import numpy as np\n", + " import os\n", + " import tarfile\n", + " print('tarfile imported')\n", + " from zipfile import ZipFile\n", + " \n", + " with ZipFile(text_path, 'r') as zipObj:\n", + " zipObj.extractall()\n", + " \n", + " # Load data\n", + " x_train = np.load('xtrain.npy')\n", + " y_train = np.load('ytrain.npy')\n", + "\n", + " x_test = np.load('xtest.npy')\n", + " y_test = np.load('ytest.npy')\n", + " \n", + " # Filter 3 and 6\n", + " def filter_36(x, y):\n", + " keep = (y == 3) | (y == 6)\n", + " x, y = x[keep], y[keep]\n", + " y = y == 3\n", + " return x,y\n", + " \n", + " print(\"Number of unfiltered training examples:\", len(x_train))\n", + " print(\"Number of unfiltered test examples:\", len(x_test))\n", + " \n", + " x_train, y_train = filter_36(x_train, y_train)\n", + " x_test, y_test = filter_36(x_test, y_test)\n", + "\n", + " print(\"Number of filtered training examples:\", len(x_train))\n", + " print(\"Number of filtered test examples:\", len(x_test))\n", + " \n", + " # Save modified data\n", + " np.save('xtrain_filtered.npy', x_train)\n", + " np.save('ytrain_filtered.npy', y_train)\n", + "\n", + " np.save('xtest_filtered.npy', x_test)\n", + " np.save('ytest_filtered.npy', y_test)\n", + " \n", + " zipObj = ZipFile(output_text_path, 'w')\n", + " \n", + " zipObj.write('xtrain_filtered.npy')\n", + " zipObj.write('ytrain_filtered.npy')\n", + " zipObj.write('xtest_filtered.npy')\n", + " zipObj.write('ytest_filtered.npy')\n", + " \n", + " zipObj.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Preprocess Data Pipeline Component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocess_data_comp = func_to_container_op(\n", + " func=preprocess_data,\n", + " base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Train Full Model Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def model_full(text_path: InputPath(), output_text_path: OutputPath()):\n", + " # A simple model based off LeNet from https://keras.io/examples/mnist_cnn/\n", + " import tensorflow as tf\n", + " from zipfile import ZipFile\n", + " import numpy as np\n", + " import os\n", + " os.system('nvidia-smi')\n", + " os.system('ls /opt')\n", + " os.system('ls /opt/nvidia-driver')\n", + " os.system('ls -la /opt/nvidia-driver')\n", + " os.system('whoami')\n", + " \n", + " model = tf.keras.Sequential()\n", + " model.add(tf.keras.layers.Conv2D(32, [3, 3], activation='relu', input_shape=(28,28,1)))\n", + " model.add(tf.keras.layers.Conv2D(64, [3, 3], activation='relu'))\n", + " model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))\n", + " model.add(tf.keras.layers.Dropout(0.25))\n", + " model.add(tf.keras.layers.Flatten())\n", + " model.add(tf.keras.layers.Dense(128, activation='relu'))\n", + " model.add(tf.keras.layers.Dropout(0.5))\n", + " model.add(tf.keras.layers.Dense(1))\n", + "\n", + " model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", + " optimizer=tf.keras.optimizers.Adam(),\n", + " metrics=['accuracy'])\n", + "\n", + " model.summary()\n", + " \n", + " with ZipFile(text_path, 'r') as zipObj:\n", + " zipObj.extractall()\n", + " \n", + " # Load data\n", + " x_train = np.load('xtrain_filtered.npy')\n", + " y_train = np.load('ytrain_filtered.npy')\n", + "\n", + " x_test = np.load('xtest_filtered.npy')\n", + " y_test = np.load('ytest_filtered.npy')\n", + " \n", + " model.fit(x_train, y_train, batch_size=128, epochs=10, verbose=1, validation_data=(x_test, y_test))\n", + "\n", + " cnn_results = model.evaluate(x_test, y_test)\n", + " \n", + " with open(output_text_path, 'w') as writer:\n", + " writer.write(str(cnn_results) + '\\n') " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Train Full Model Pipeline Component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_full_comp = func_to_container_op(\n", + " func=model_full,\n", + " base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-30'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Train Fair Model Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def model_fair(text_path: InputPath(), output_text_path: OutputPath()):\n", + " import tensorflow as tf\n", + " from zipfile import ZipFile\n", + " import numpy as np\n", + " \n", + " model = tf.keras.Sequential()\n", + " model.add(tf.keras.layers.Flatten(input_shape=(28,28,1)))\n", + " model.add(tf.keras.layers.Dense(2, activation='relu'))\n", + " model.add(tf.keras.layers.Dense(1))\n", + "\n", + " model.compile(loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),\n", + " optimizer=tf.keras.optimizers.Adam(),\n", + " metrics=['accuracy'])\n", + "\n", + " model.summary()\n", + " \n", + " with ZipFile(text_path, 'r') as zipObj:\n", + " zipObj.extractall()\n", + " \n", + " # Load data\n", + " x_train = np.load('xtrain_filtered.npy')\n", + " y_train = np.load('ytrain_filtered.npy')\n", + "\n", + " x_test = np.load('xtest_filtered.npy')\n", + " y_test = np.load('ytest_filtered.npy')\n", + " \n", + " model.fit(x_train, y_train, batch_size=128, epochs=1, verbose=1, validation_data=(x_test, y_test))\n", + "\n", + " cnn_results = model.evaluate(x_test, y_test)\n", + " \n", + " with open(output_text_path, 'w') as writer:\n", + " writer.write(str(cnn_results) + '\\n') " + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Train Fair Model Pipeline Component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "model_fair_comp = func_to_container_op(\n", + " func=model_fair,\n", + " base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Evaluate Models Function" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def models_evaluate(text_path_0: InputPath(), text_path_1: InputPath()):\n", + " print('model 0:')\n", + " with open(text_path_0, 'r') as reader:\n", + " for line in reader:\n", + " print(line, end = '')\n", + " print('model 1:')\n", + " with open(text_path_1, 'r') as reader:\n", + " for line in reader:\n", + " print(line, end = '')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Evaluate Models Pipeline Component" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "models_evaluate_comp = func_to_container_op(\n", + " func=models_evaluate,\n", + " base_image='gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-33'\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Create Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "krb_secret = k8s_client.V1SecretVolumeSource(secret_name='krb-secret')\n", + "krb_secret_volume = k8s_client.V1Volume(name='krb-secret-vol', secret=krb_secret)\n", + "krb_secret_volume_mount = k8s_client.V1VolumeMount(name=krb_secret_volume.name, mount_path='/secret/krb-secret-vol')\n", + "\n", + "eos_host_path = k8s_client.V1HostPathVolumeSource(path='/var/eos')\n", + "eos_volume = k8s_client.V1Volume(name='eos', host_path=eos_host_path)\n", + "eos_volume_mount = k8s_client.V1VolumeMount(name=eos_volume.name, mount_path='/eos')\n", + "\n", + "nvidia_driver_host_path = k8s_client.V1HostPathVolumeSource(path='/opt/nvidia-driver')\n", + "nvidia_driver_volume = k8s_client.V1Volume(name='nvidia-driver', host_path=nvidia_driver_host_path)\n", + "nvidia_driver_volume_mount = k8s_client.V1VolumeMount(name=nvidia_driver_volume.name, mount_path='/opt/nvidia-driver')\n", + "\n", + "env_path = k8s_client.V1EnvVar(name='PATH', value='/bin:/usr/bin:/usr/local/bin:/opt/nvidia-driver/bin')\n", + "env_ld_library_path = k8s_client.V1EnvVar(name='LD_LIBRARY_PATH', value='/opt/nvidia-driver/lib64')\n", + "\n", + "@dsl.pipeline(\n", + " name='eos-gpu-kfp',\n", + " description='TEST EOS GPU'\n", + ")\n", + "def ml_pipeline_first():\n", + " data_dir = read_data_comp() \\\n", + " .add_volume(krb_secret_volume) \\\n", + " .add_volume_mount(krb_secret_volume_mount) \\\n", + " .add_volume(eos_volume) \\\n", + " .add_volume_mount(eos_volume_mount)\n", + " \n", + " new_dir = preprocess_data_comp(data_dir.output) \\\n", + " .add_volume(krb_secret_volume) \\\n", + " .add_volume_mount(krb_secret_volume_mount) \\\n", + " .add_volume(eos_volume) \\\n", + " .add_volume_mount(eos_volume_mount)\n", + " \n", + " cnn_res = model_full_comp(new_dir.output) \\\n", + " .add_volume(krb_secret_volume) \\\n", + " .add_volume_mount(krb_secret_volume_mount) \\\n", + " .add_volume(eos_volume) \\\n", + " .add_volume_mount(eos_volume_mount) \\\n", + " .set_gpu_limit(1) \\\n", + " .add_volume(nvidia_driver_volume) \\\n", + " .add_volume_mount(nvidia_driver_volume_mount) \\\n", + " .add_env_variable(env_path) \\\n", + " .add_env_variable(env_ld_library_path)\n", + " \n", + " fairnn_res = model_fair_comp(new_dir.output) \\\n", + " .add_volume(krb_secret_volume) \\\n", + " .add_volume_mount(krb_secret_volume_mount) \\\n", + " .add_volume(eos_volume) \\\n", + " .add_volume_mount(eos_volume_mount)\n", + " \n", + " models_evaluate_comp(cnn_res.output, fairnn_res.output) \\\n", + " .add_volume(krb_secret_volume) \\\n", + " .add_volume_mount(krb_secret_volume_mount) \\\n", + " .add_volume(eos_volume) \\\n", + " .add_volume_mount(eos_volume_mount)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Compile Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipeline_name = 'example_kfp_pipeline_40'\n", + "pipeline_file = pipeline_name + '.yaml'\n", + "experiment_name = 'example_kfp_experiment_dg'\n", + "client = kfp.Client()\n", + "\n", + "workflow = kfp.compiler.Compiler().compile(ml_pipeline_first, pipeline_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Function for Accessing EOS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "def post_process(pipeline_file, outfile):\n", + " with open(pipeline_file, \"r\") as stream:\n", + " pip_dict = yaml.safe_load(stream)\n", + " \n", + " copy_command = 'cp /secret/krb-secret-vol/krb5cc_1000 /tmp/krb5cc_1000'\n", + " chmod_command = 'chmod 600 /tmp/krb5cc_1000'\n", + " \n", + " for template in pip_dict['spec']['templates']:\n", + " if 'container' in template.keys():\n", + " component_command_list = template['container']['command'][2].split('\\n')\n", + " component_command_list.insert(2, copy_command)\n", + " component_command_list.insert(3, chmod_command)\n", + " \n", + " # Check EOS access with this command\n", + " # component_command_list.insert(4, 'ls -l /eos/user/d/dgolubov')\n", + " joined_string = '\\n'.join(component_command_list)\n", + "\n", + " template['container']['command'][2] = joined_string\n", + " \n", + " with open(outfile, 'w') as outfile:\n", + " yaml.dump(pip_dict, outfile, default_flow_style=False)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Apply Access to EOS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "post_process(pipeline_file, pipeline_file)" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "tags": [] + }, + "source": [ + "### Upload and Run Pipeline" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.upload_pipeline(pipeline_file, pipeline_name)\n", + "exp = client.create_experiment(name=experiment_name)\n", + "run = client.run_pipeline(exp.id, pipeline_name, pipeline_file)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "colab": { + "collapsed_sections": [], + "name": "mnist.ipynb", + "private_outputs": true, + "provenance": [], + "toc_visible": true + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "kubeflow_notebook": { + "autosnapshot": false, + "docker_image": "gitlab-registry.cern.ch/ai-ml/kubeflow_images/tensorflow-notebook-gpu-2.1.0:v0.6.1-30", + "experiment": { + "id": "", + "name": "" + }, + "experiment_name": "", + "katib_metadata": { + "algorithm": { + "algorithmName": "grid" + }, + "maxFailedTrialCount": 3, + "maxTrialCount": 12, + "objective": { + "objectiveMetricName": "", + "type": "minimize" + }, + "parallelTrialCount": 3, + "parameters": [] + }, + "katib_run": false, + "pipeline_description": "", + "pipeline_name": "", + "snapshot_volumes": false, + "steps_defaults": [], + "volume_access_mode": "rwm", + "volumes": [] + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}