Add section on hyperparameter tuning using Vertex AI's blackbox optimization service
parent
96edbd5ef4
commit
c345b60732
|
@ -2409,13 +2409,264 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Hyperparameter Tuning using Keras Tuner on Vertex AI"
|
||||
"# Hyperparameter Tuning on Vertex AI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 87,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Writing my_vertex_ai_trial.py\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%%writefile my_vertex_ai_trial.py\n",
|
||||
"\n",
|
||||
"import argparse\n",
|
||||
"\n",
|
||||
"parser = argparse.ArgumentParser()\n",
|
||||
"parser.add_argument(\"--n_hidden\", type=int, default=2)\n",
|
||||
"parser.add_argument(\"--n_neurons\", type=int, default=256)\n",
|
||||
"parser.add_argument(\"--learning_rate\", type=float, default=1e-2)\n",
|
||||
"parser.add_argument(\"--optimizer\", default=\"adam\")\n",
|
||||
"args = parser.parse_args()\n",
|
||||
"\n",
|
||||
"import tensorflow as tf\n",
|
||||
"\n",
|
||||
"def build_model(args):\n",
|
||||
" with tf.distribute.MirroredStrategy().scope():\n",
|
||||
" model = tf.keras.Sequential()\n",
|
||||
" model.add(tf.keras.layers.Flatten(input_shape=[28, 28], dtype=tf.uint8))\n",
|
||||
" for _ in range(args.n_hidden):\n",
|
||||
" model.add(tf.keras.layers.Dense(args.n_neurons, activation=\"relu\"))\n",
|
||||
" model.add(tf.keras.layers.Dense(10, activation=\"softmax\"))\n",
|
||||
" opt = tf.keras.optimizers.get(args.optimizer)\n",
|
||||
" opt.learning_rate = args.learning_rate\n",
|
||||
" model.compile(loss=\"sparse_categorical_crossentropy\", optimizer=opt,\n",
|
||||
" metrics=[\"accuracy\"])\n",
|
||||
" return model\n",
|
||||
"\n",
|
||||
"# extra code – loads and splits the dataset\n",
|
||||
"mnist = tf.keras.datasets.mnist.load_data()\n",
|
||||
"(X_train_full, y_train_full), (X_test, y_test) = mnist\n",
|
||||
"X_valid, X_train = X_train_full[:5000], X_train_full[5000:]\n",
|
||||
"y_valid, y_train = y_train_full[:5000], y_train_full[5000:]\n",
|
||||
"\n",
|
||||
"# extra code – use the AIP_* environment variable and create the callbacks\n",
|
||||
"import os\n",
|
||||
"model_dir = os.getenv(\"AIP_MODEL_DIR\")\n",
|
||||
"tensorboard_log_dir = os.getenv(\"AIP_TENSORBOARD_LOG_DIR\")\n",
|
||||
"checkpoint_dir = os.getenv(\"AIP_CHECKPOINT_DIR\")\n",
|
||||
"trial_id = os.getenv(\"CLOUD_ML_TRIAL_ID\")\n",
|
||||
"tensorboard_cb = tf.keras.callbacks.TensorBoard(tensorboard_log_dir)\n",
|
||||
"early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5)\n",
|
||||
"callbacks = [tensorboard_cb, early_stopping_cb]\n",
|
||||
"\n",
|
||||
"model = build_model(args)\n",
|
||||
"history = model.fit(X_train, y_train, validation_data=(X_valid, y_valid),\n",
|
||||
" epochs=10, callbacks=callbacks)\n",
|
||||
"model.save(model_dir) # extra code\n",
|
||||
"\n",
|
||||
"import hypertune\n",
|
||||
"\n",
|
||||
"hypertune = hypertune.HyperTune()\n",
|
||||
"hypertune.report_hyperparameter_tuning_metric(\n",
|
||||
" hyperparameter_metric_tag=\"accuracy\", # name of the reported metric\n",
|
||||
" metric_value=max(history.history[\"val_accuracy\"]), # max accuracy value\n",
|
||||
" global_step=model.optimizer.iterations.numpy(),\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Training script copied to:\n",
|
||||
"gs://homl3-mybucket5/staging/aiplatform-2022-04-18-18:14:02.860-aiplatform_custom_trainer_script-0.1.tar.gz.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"trial_job = aiplatform.CustomJob.from_local_script(\n",
|
||||
" display_name=\"my_search_trial_job\",\n",
|
||||
" script_path=\"my_vertex_ai_trial.py\", # path to your training script\n",
|
||||
" container_uri=\"gcr.io/cloud-aiplatform/training/tf-gpu.2-4:latest\",\n",
|
||||
" staging_bucket=f\"gs://{bucket_name}/staging\",\n",
|
||||
" accelerator_type=\"NVIDIA_TESLA_K80\",\n",
|
||||
" accelerator_count=2, # in this example, each trial will have 2 GPUs\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating HyperparameterTuningJob\n",
|
||||
"HyperparameterTuningJob created. Resource name: projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568\n",
|
||||
"To use this HyperparameterTuningJob in another session:\n",
|
||||
"hpt_job = aiplatform.HyperparameterTuningJob.get('projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568')\n",
|
||||
"View HyperparameterTuningJob:\n",
|
||||
"https://console.cloud.google.com/ai/platform/locations/us-central1/training/5825136187899117568?project=522977795627\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_RUNNING\n",
|
||||
"HyperparameterTuningJob projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568 current state:\n",
|
||||
"JobState.JOB_STATE_SUCCEEDED\n",
|
||||
"HyperparameterTuningJob run completed. Resource name: projects/522977795627/locations/us-central1/hyperparameterTuningJobs/5825136187899117568\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from google.cloud.aiplatform import hyperparameter_tuning as hpt\n",
|
||||
"\n",
|
||||
"hp_job = aiplatform.HyperparameterTuningJob(\n",
|
||||
" display_name=\"my_hp_search_job\",\n",
|
||||
" custom_job=trial_job,\n",
|
||||
" metric_spec={\"accuracy\": \"maximize\"},\n",
|
||||
" parameter_spec={\n",
|
||||
" \"learning_rate\": hpt.DoubleParameterSpec(min=1e-3, max=10, scale=\"log\"),\n",
|
||||
" \"n_neurons\": hpt.IntegerParameterSpec(min=1, max=300, scale=\"linear\"),\n",
|
||||
" \"n_hidden\": hpt.IntegerParameterSpec(min=1, max=10, scale=\"linear\"),\n",
|
||||
" \"optimizer\": hpt.CategoricalParameterSpec([\"sgd\", \"adam\"]),\n",
|
||||
" },\n",
|
||||
" max_trial_count=100,\n",
|
||||
" parallel_trial_count=20,\n",
|
||||
")\n",
|
||||
"hp_job.run()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_final_metric(trial, metric_id):\n",
|
||||
" for metric in trial.final_measurement.metrics:\n",
|
||||
" if metric.metric_id == metric_id:\n",
|
||||
" return metric.value\n",
|
||||
"\n",
|
||||
"trials = hp_job.trials\n",
|
||||
"trial_accuracies = [get_final_metric(trial, \"accuracy\") for trial in trials]\n",
|
||||
"best_trial = trials[np.argmax(trial_accuracies)]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"0.977400004863739"
|
||||
]
|
||||
},
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"max(trial_accuracies)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'98'"
|
||||
]
|
||||
},
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"best_trial.id"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[parameter_id: \"learning_rate\"\n",
|
||||
"value {\n",
|
||||
" number_value: 0.001\n",
|
||||
"}\n",
|
||||
", parameter_id: \"n_hidden\"\n",
|
||||
"value {\n",
|
||||
" number_value: 8.0\n",
|
||||
"}\n",
|
||||
", parameter_id: \"n_neurons\"\n",
|
||||
"value {\n",
|
||||
" number_value: 216.0\n",
|
||||
"}\n",
|
||||
", parameter_id: \"optimizer\"\n",
|
||||
"value {\n",
|
||||
" string_value: \"adam\"\n",
|
||||
"}\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"best_trial.parameters"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Extra Material – Distributed Keras Tuner on Vertex AI"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
|
@ -2506,12 +2757,12 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Replace `gs://my_bucket` with your bucket's name:"
|
||||
"Replace `/gcs/my_bucket/` with <code>/gcs/<i>{bucket_name}</i>/</code>:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 88,
|
||||
"execution_count": 95,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2524,7 +2775,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 89,
|
||||
"execution_count": 96,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2540,7 +2791,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 90,
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -2623,7 +2874,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 91,
|
||||
"execution_count": 98,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
|
@ -2650,7 +2901,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -2689,14 +2940,14 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 93,
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Uploaded datasets/mnist\n"
|
||||
"Uploaded datasets/mnist \n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -2713,7 +2964,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
|
@ -2721,23 +2972,25 @@
|
|||
"output_type": "stream",
|
||||
"text": [
|
||||
"Creating ImageDataset\n",
|
||||
"Create ImageDataset backing LRO: projects/522977795627/locations/us-central1/datasets/7569473452214583296/operations/3268657372830105600\n",
|
||||
"ImageDataset created. Resource name: projects/522977795627/locations/us-central1/datasets/7569473452214583296\n",
|
||||
"Create ImageDataset backing LRO: projects/522977795627/locations/us-central1/datasets/7532459492777132032/operations/3812233931370004480\n",
|
||||
"ImageDataset created. Resource name: projects/522977795627/locations/us-central1/datasets/7532459492777132032\n",
|
||||
"To use this ImageDataset in another session:\n",
|
||||
"ds = aiplatform.ImageDataset('projects/522977795627/locations/us-central1/datasets/7569473452214583296')\n",
|
||||
"Importing ImageDataset data: projects/522977795627/locations/us-central1/datasets/7569473452214583296\n",
|
||||
"Import ImageDataset data backing LRO: projects/522977795627/locations/us-central1/datasets/7569473452214583296/operations/5061090024523563008\n",
|
||||
"ImageDataset data imported. Resource name: projects/522977795627/locations/us-central1/datasets/7569473452214583296\n"
|
||||
"ds = aiplatform.ImageDataset('projects/522977795627/locations/us-central1/datasets/7532459492777132032')\n",
|
||||
"Importing ImageDataset data: projects/522977795627/locations/us-central1/datasets/7532459492777132032\n",
|
||||
"Import ImageDataset data backing LRO: projects/522977795627/locations/us-central1/datasets/7532459492777132032/operations/3010593197698056192\n",
|
||||
"ImageDataset data imported. Resource name: projects/522977795627/locations/us-central1/datasets/7532459492777132032\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from aiplatform.schema.dataset.ioformat.image import single_label_classification\n",
|
||||
"\n",
|
||||
"mnist_dataset = aiplatform.ImageDataset.create(\n",
|
||||
" display_name=\"mnist-dataset\",\n",
|
||||
" gcs_source=[f\"gs://{bucket_name}/mnist/import.csv\"],\n",
|
||||
" project=project_id,\n",
|
||||
" import_schema_uri=aiplatform.schema.dataset.ioformat.image.single_label_classification,\n",
|
||||
" sync=True\n",
|
||||
" import_schema_uri=single_label_classification,\n",
|
||||
" sync=True,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -2745,7 +2998,14 @@
|
|||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**TODO**: create an AutoML training job on this dataset."
|
||||
"Create an AutoML training job on this dataset:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"**TODO**"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
|
Loading…
Reference in New Issue