diff --git a/notebooks/README.md b/notebooks/README.md
new file mode 100644
index 0000000..4f5c571
--- /dev/null
+++ b/notebooks/README.md
@@ -0,0 +1,3 @@
+# Notebooks
+
+Data science for MyHerodotus is performed using Jupyter notebooks running on Vertex AI Workbench. However, the file size of Jupyter notebooks makes it prohibitive to download them. Thus, all notebooks are stored in the `data-
\ No newline at end of file
diff --git a/notebooks/fine-tuning.ipynb b/notebooks/fine-tuning.ipynb
deleted file mode 100644
index 2a2e13b..0000000
--- a/notebooks/fine-tuning.ipynb
+++ /dev/null
@@ -1,915 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "id": "e2b8a82f-9020-477d-abed-2e6f0e081c0f",
-   "metadata": {},
-   "source": [
-    "# Fine tune a Gemini and a Gemma model\n",
-    "\n",
-    "+ Dataset: [Guanaco](https://huggingface.co/datasets/timdettmers/openassistant-guanaco)\n",
-    "+ Prepare the data: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-supervised-tuning-prepare\n",
-    "+ Gemma: https://huggingface.co/google/gemma-2-27b-it-pytorch\n",
-    "  - Also https://console.cloud.google.com/vertex-ai/publishers/google/model-garden/gemma2\n",
-    "  - Also https://www.kaggle.com/models/google/gemma-2\n",
-    "+ Gemini: https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-use-supervised-tuning"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "d5e90114-d5b6-4382-a0ed-f50da90d022d",
-   "metadata": {},
-   "source": [
-    "## Step 0. Install and import libraries"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "id": "1bb3a321-1db0-404c-87eb-99038430bff8",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Writing requirements.txt\n"
-     ]
-    }
-   ],
-   "source": [
-    "%%writefile requirements.txt\n",
-    "datasets\n",
-    "pandas\n",
-    "torch\n",
-    "google-cloud-aiplatform\n",
-    "google-cloud-storage\n",
-    "jsonschema"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7f542d78-860a-44e7-a200-8bd353a1233b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!pip install --upgrade -r requirements.txt"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "3d916ee2-00d7-4719-afe3-d5e97215c34e",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "!pip install datasets"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "id": "bb50502e-9218-450a-ad0c-d08159df55c7",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: jsonschema in /opt/conda/lib/python3.10/site-packages (4.23.0)\n",
-      "Requirement already satisfied: attrs>=22.2.0 in /opt/conda/lib/python3.10/site-packages (from jsonschema) (24.2.0)\n",
-      "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/conda/lib/python3.10/site-packages (from jsonschema) (2023.12.1)\n",
-      "Requirement already satisfied: referencing>=0.28.4 in /opt/conda/lib/python3.10/site-packages (from jsonschema) (0.35.1)\n",
-      "Requirement already satisfied: rpds-py>=0.7.1 in /opt/conda/lib/python3.10/site-packages (from jsonschema) (0.20.0)\n"
-     ]
-    }
-   ],
-   "source": [
-    "!pip install jsonschema"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 54,
-   "id": "bdba50cd-c52b-462d-8e88-1c2c923599bf",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import pandas as pd\n",
-    "import time\n",
-    "import torch\n",
-    "from datasets import load_dataset\n",
-    "from jsonschema import validate\n",
-    "from jsonschema.protocols import Validator\n",
-    "\n",
-    "import vertexai\n",
-    "from vertexai.generative_models import GenerativeModel\n",
-    "from vertexai.tuning import sft\n",
-    "\n",
-    "from google.cloud import storage\n",
-    "from google.cloud import aiplatform"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "id": "ef947d19-fd2c-424e-8742-fa5fcace3a38",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "README.md: 100%|███████████████████████████| 7.62k/7.62k [00:00<00:00, 19.5MB/s]\n",
-      "train-00000-of-00001.parquet: 100%|█████████| 14.5M/14.5M [00:00<00:00, 149MB/s]\n",
-      "validation-00000-of-00001.parquet: 100%|████| 1.82M/1.82M [00:00<00:00, 295MB/s]\n",
-      "Generating train split: 100%|██| 87599/87599 [00:00<00:00, 103689.07 examples/s]\n",
-      "Generating validation split: 100%|█| 10570/10570 [00:00<00:00, 302876.11 example\n",
-      "{'id': '5733be284776f41900661182', 'title': 'University_of_Notre_Dame', 'context': 'Architecturally, the school has a Catholic character. Atop the Main Building\\'s gold dome is a golden statue of the Virgin Mary. Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised with the legend \"Venite Ad Me Omnes\". Next to the Main Building is the Basilica of the Sacred Heart. Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection. It is a replica of the grotto at Lourdes, France where the Virgin Mary reputedly appeared to Saint Bernadette Soubirous in 1858. At the end of the main drive (and in a direct line that connects through 3 statues and the Gold Dome), is a simple, modern stone statue of Mary.', 'question': 'To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?', 'answers': {'text': ['Saint Bernadette Soubirous'], 'answer_start': [515]}}\n"
-     ]
-    }
-   ],
-   "source": [
-    "!python -c \"from datasets import load_dataset; print(load_dataset('squad', split='train')[0])\""
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "9e5760f1-f238-4a50-a3f0-c0117d8bc7e3",
-   "metadata": {},
-   "source": [
-    "## Step 1. Transform dataset for Vertex\n",
-    "\n",
-    "GUANACO dataset shape:\n",
-    "\n",
-    "```json\n",
-    "{\n",
-    "    \"text\": \"### Human: blah blah .### Assistant: blah blah.### Human: blah blah blah\"\n",
-    "}\n",
-    "\n",
-    "```\n",
-    "\n",
-    "Vertex tuning dataset shape. Note that the `systemInstruction` field is optional -- and not necessary for this\n",
-    "exercise. [From here](https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini-supervised-tuning-prepare).\n",
-    "\n",
-    "```json\n",
-    "{\n",
-    "  \"systemInstruction\": {\n",
-    "    \"role\": string,\n",
-    "    \"parts\": [\n",
-    "      {\n",
-    "        \"text\": string\n",
-    "      }\n",
-    "    ]\n",
-    "  },\n",
-    "  \"contents\": [\n",
-    "    {\n",
-    "      \"role\": string, // must be \"user\" or \"model\"\n",
-    "      \"parts\": [\n",
-    "        {\n",
-    "          // Union field data can be only one of the following:\n",
-    "          \"text\": string,\n",
-    "          \"fileData\": {\n",
-    "            \"mimeType\": string,\n",
-    "            \"fileUri\": string\n",
-    "          }\n",
-    "        }\n",
-    "      ]\n",
-    "    }\n",
-    "  ]\n",
-    "}\n",
-    "```"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "cd5b2560-18d2-4691-a7ef-747e16ebabdd",
-   "metadata": {},
-   "source": [
-    "Here is a pseudocode transform for the dataset:\n",
-    "\n",
-    "1. Load a row from the Guanaco dataset.\n",
-    "1. Read the `text` field.\n",
-    "1. Split the `text` field on the `###` character string.\n",
-    "1. Read the first substring of each item in the list, reading up to the `:` character.\n",
-    "1. If the first substring is \"Human\", create a new dictionary like so:\n",
-    "\n",
-    "   ```json\n",
-    "   {\n",
-    "      \"role\": \"user\",\n",
-    "      \"parts\": {\n",
-    "          \"text\": \"[REMAINDER OF SPLIT\"\n",
-    "      }\n",
-    "   }\n",
-    "   ```\n",
-    "1. If the first substring is \"Assistant\", create a new dictionary like so:\n",
-    "\n",
-    "   ```json\n",
-    "   {\n",
-    "      \"role\": \"model\",\n",
-    "      \"parts\": {\n",
-    "          \"text\": \"[REMAINDER OF SPLIT\"\n",
-    "      }\n",
-    "   }\n",
-    "   ```\n",
-    "1. Append each dictionary to a list.\n",
-    "1. Create one last new dictionary and set the `contents` field like so:\n",
-    "\n",
-    "  ```json\n",
-    "  {\n",
-    "      \"contents\": [TEXT DICTIONARIES]\n",
-    "  }\n",
-    "  ```"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "9c820ca1-bd8f-4872-8839-fb879822c341",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "# Create the schema validation\n",
-    "# NOTE: This is only a partial schema for this data transform purpose.\n",
-    "schema = {\n",
-    "    \"type\": \"object\",\n",
-    "    \"properties\": {\n",
-    "        \"contents\": {\n",
-    "            \"type\": \"array\",\n",
-    "            \"items\": {\n",
-    "                \"type\": \"object\",\n",
-    "                \"properties\": {\n",
-    "                    \"role\": { \"type\": \"string\" },\n",
-    "                    \"parts\": {\n",
-    "                        \"type\": \"array\",\n",
-    "                        \"items\": {\n",
-    "                            \"type\": \"object\",\n",
-    "                            \"properties\": {\n",
-    "                                \"text\": { \"type\": \"string\" }\n",
-    "                            }\n",
-    "                        }\n",
-    "                    }\n",
-    "                }\n",
-    "            }\n",
-    "        }\n",
-    "    }\n",
-    "}\n",
-    "Validator.check_schema(schema)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "1ba8036a-977b-4698-8009-49a642484dab",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Pandas\n",
-    "splits = {'train': 'openassistant_best_replies_train.jsonl', 'test': 'openassistant_best_replies_eval.jsonl'}\n",
-    "df = pd.read_json(\"hf://datasets/timdettmers/openassistant-guanaco/\" + splits[\"train\", lines=True])"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 6,
-   "id": "946307ee-1148-4b4c-936e-c29e544d1773",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "69fed47f25734ac984687c3b428ca44d",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "README.md:   0%|          | 0.00/395 [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "Repo card metadata block was not found. Setting CardData to empty.\n"
-     ]
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "5b45d57b8e0e4c0caa11caf9e6180793",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "openassistant_best_replies_train.jsonl:   0%|          | 0.00/20.9M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "e58b6d53059243f094f395263dab9dda",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "openassistant_best_replies_eval.jsonl:   0%|          | 0.00/1.11M [00:00<?, ?B/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "1bb9a1cc248b440a9262a86fc2240d14",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating train split:   0%|          | 0/9846 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "application/vnd.jupyter.widget-view+json": {
-       "model_id": "c404438165f947daa8cd0aab26410ea2",
-       "version_major": 2,
-       "version_minor": 0
-      },
-      "text/plain": [
-       "Generating test split:   0%|          | 0/518 [00:00<?, ? examples/s]"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    }
-   ],
-   "source": [
-    "# HuggingFace datasets\n",
-    "ds = load_dataset(\"timdettmers/openassistant-guanaco\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "id": "7e8fbfee-ec46-4fc8-810a-42901f27b3f5",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'text': '### Human: Can you write a short introduction about the relevance of the term \"monopsony\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.### Assistant: \"Monopsony\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\\n\\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\\n\\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\\n\\nReferences:\\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.### Human: Now explain it to a dog'}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(ds['train'][0])"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "63a383bd-5799-4e2f-9012-84a7f1a8b82d",
-   "metadata": {},
-   "source": [
-    "### Step 1a. Develop transform on single row"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 20,
-   "id": "f769f9a3-c17c-486e-b845-1579b0683d4b",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "test_row = ds['train'][0]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 24,
-   "id": "621d6e9b-3432-4656-9191-0393b005e204",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['', 'Human: Can you write a short introduction about the relevance of the term \"monopsony\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.', 'Assistant: \"Monopsony\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\\n\\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\\n\\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\\n\\nReferences:\\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.', 'Human: Now explain it to a dog']\n"
-     ]
-    }
-   ],
-   "source": [
-    "row_text = test_row['text']\n",
-    "parts_text = row_text.split('### ')\n",
-    "print(parts_text)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "id": "9e006bf9-fbb4-40c9-9e4d-fb977e6675c2",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[{'role': 'user', 'parts': [{'text': 'Can you write a short introduction about the relevance of the term \"monopsony\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.'}]}, {'role': 'model', 'parts': [{'text': '\"Monopsony\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\\n\\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\\n\\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\\n\\nReferences:\\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.'}]}, {'role': 'user', 'parts': [{'text': 'Now explain it to a dog'}]}]\n"
-     ]
-    }
-   ],
-   "source": [
-    "parts = []\n",
-    "for p in parts_text:\n",
-    "    if p == '':\n",
-    "        continue\n",
-    "    \n",
-    "    role, content = p.split(\": \")\n",
-    "    if role == \"Human\":\n",
-    "        parts.append({\n",
-    "            \"role\": \"user\",\n",
-    "            \"parts\": [{\n",
-    "                \"text\": content\n",
-    "            }],\n",
-    "        })\n",
-    "        continue\n",
-    "    parts.append({\n",
-    "        \"role\": \"model\",\n",
-    "        \"parts\": [{\n",
-    "            \"text\": content\n",
-    "        }],\n",
-    "    })\n",
-    "print(parts)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "id": "214381b2-2b60-4c4e-86af-e67629a744ec",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "clean_row = {\n",
-    "    \"content\": parts\n",
-    "}\n",
-    "validate(clean_row, schema)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "id": "fccecbf0-702d-4ea9-936e-2eb1f9affa74",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{\"content\": [{\"role\": \"user\", \"parts\": [{\"text\": \"Can you write a short introduction about the relevance of the term \\\"monopsony\\\" in economics? Please use examples related to potential monopsonies in the labour market and cite relevant research.\"}]}, {\"role\": \"model\", \"parts\": [{\"text\": \"\\\"Monopsony\\\" refers to a market structure where there is only one buyer for a particular good or service. In economics, this term is particularly relevant in the labor market, where a monopsony employer has significant power over the wages and working conditions of their employees. The presence of a monopsony can result in lower wages and reduced employment opportunities for workers, as the employer has little incentive to increase wages or provide better working conditions.\\n\\nRecent research has identified potential monopsonies in industries such as retail and fast food, where a few large companies control a significant portion of the market (Bivens & Mishel, 2013). In these industries, workers often face low wages, limited benefits, and reduced bargaining power, leading to a situation where they are dependent on the employer for their livelihood. This dependence can result in further suppression of wages and a decline in working conditions.\\n\\nOverall, the concept of monopsony is essential to understanding the dynamics of labor markets and the impact of market power on workers. Further research is needed to understand the extent and impact of monopsonies on the economy and to develop policies to address this issue.\\n\\nReferences:\\nBivens, J., & Mishel, L. (2013). The Pay of Corporate Executives and Financial Professionals as Evidence of Rents in Top 1 Percent Incomes. Journal of Economic Perspectives, 27(3), 57-78.\"}]}, {\"role\": \"user\", \"parts\": [{\"text\": \"Now explain it to a dog\"}]}]}\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(json.dumps(clean_row))"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "23e0235a-bc33-4df8-8edf-4f34da2c3b7d",
-   "metadata": {},
-   "source": [
-    "### Step 1b. Apply transform to all rows"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "4508ac67-8c4a-4875-8e7f-d807009c3724",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "jsonl_str = ''\n",
-    "OUTPUT_FILE = 'guanaco_vertex_tune.jsonl'\n",
-    "REJECTS_FILE = 'guanaco_rejects.jsonl'\n",
-    "\n",
-    "for r in ds['train']:\n",
-    "    try:\n",
-    "        row_text = r['text']\n",
-    "        parts_text = row_text.split('### ')\n",
-    "        parts = []\n",
-    "        for p in parts_text:\n",
-    "            if p == '':\n",
-    "                continue\n",
-    "\n",
-    "            role, content = p.split(\": \")\n",
-    "            if role == \"Human\":\n",
-    "                parts.append({\n",
-    "                    \"role\": \"user\",\n",
-    "                    \"parts\": [{\n",
-    "                        \"text\": content\n",
-    "                    }],\n",
-    "                })\n",
-    "                continue\n",
-    "            parts.append({\n",
-    "                \"role\": \"model\",\n",
-    "                \"parts\": [{\n",
-    "                    \"text\": content\n",
-    "                }],\n",
-    "            })\n",
-    "\n",
-    "        clean_row = {\n",
-    "            \"contents\": parts\n",
-    "        }\n",
-    "        validate(clean_row, schema)\n",
-    "\n",
-    "        jsonl_str = f\"{json.dumps(clean_row)}\\n\"\n",
-    "\n",
-    "        with open(OUTPUT_FILE, 'a') as f:\n",
-    "            f.write(jsonl_str)\n",
-    "    except ValueError as e:\n",
-    "        with open(REJECTS_FILE, 'a') as rf:\n",
-    "            rf.write(f\"{json.dumps(r)}\\n\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "25ca906e-47c0-4dae-99f4-55c5fdd3eee6",
-   "metadata": {},
-   "source": [
-    "## Step 2. Upload JSONL file to GCS\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "303c9e59-6011-4bc8-9cb1-7322ccd0204d",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "PROJECT_ID = !gcloud config get-value project\n",
-    "PROJECT_ID = PROJECT_ID[0]\n",
-    "print(PROJECT_ID)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 50,
-   "id": "db579ab1-f542-41e9-abb9-e4deaea96196",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "gcs_bucket = f\"{PROJECT_ID}-bucket\"\n",
-    "storage_client = storage.Client(project=PROJECT_ID)\n",
-    "bucket = storage_client.bucket(gcs_bucket)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "3d3d668a-8db7-4e40-b79b-73c3ecc22746",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "blob = bucket.blob(OUTPUT_FILE)\n",
-    "blob.upload_from_filename(OUTPUT_FILE)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "e9d84aeb-43eb-4be2-ab6c-c2d6e59a38ef",
-   "metadata": {},
-   "source": [
-    "## Step 3. Create a tuning job from API\n",
-    "\n",
-    "Can't get the tuning job to work from the console. Hopefully the API will return more\n",
-    "helpful error messages."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "83ed5047-163b-4900-8554-7a3709762935",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "vertexai.init(project=PROJECT_ID, location=\"us-west1\")\n",
-    "\n",
-    "sft_tuning_job = sft.train(\n",
-    "    source_model=\"gemini-1.5-flash-002\",\n",
-    "    train_dataset=f\"gs://{gcs_bucket}/{OUTPUT_FILE}\",\n",
-    "    epochs=4,\n",
-    "    adapter_size=4,\n",
-    "    learning_rate_multiplier=1.0,\n",
-    "    tuned_model_display_name=\"tuned_gemini_1_5_flash_guanaco\",\n",
-    ")\n",
-    "\n",
-    "# Polling for job completion\n",
-    "while not sft_tuning_job.has_ended:\n",
-    "    time.sleep(60)\n",
-    "    sft_tuning_job.refresh()\n",
-    "\n",
-    "print(sft_tuning_job.tuned_model_name)\n",
-    "print(sft_tuning_job.tuned_model_endpoint_name)\n",
-    "print(sft_tuning_job.experiment)\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "08dcc44e-4ee6-4092-a040-51801af6d877",
-   "metadata": {},
-   "source": [
-    "## Step 4. Get model resource name :/\n",
-    "\n",
-    "The Go libraries don't make getting this value easy."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 60,
-   "id": "521bb222-ed4f-4bbc-aaa5-9f2362b8892f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [],
-   "source": [
-    "tuned_model = GenerativeModel(f'projects/{PROJECT_ID}/locations/us-west1/endpoints/1926929312049528832')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 61,
-   "id": "835b0c02-7d0a-4bd2-a985-81e06fde689f",
-   "metadata": {
-    "tags": []
-   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Help on GenerativeModel in module vertexai.generative_models object:\n",
-      "\n",
-      "class GenerativeModel(vertexai.generative_models._generative_models._GenerativeModel)\n",
-      " |  GenerativeModel(model_name: str, *, generation_config: Union[ForwardRef('GenerationConfig'), Dict[str, Any], NoneType] = None, safety_settings: Union[List[ForwardRef('SafetySetting')], Dict[google.cloud.aiplatform_v1beta1.types.content.HarmCategory, google.cloud.aiplatform_v1beta1.types.content.SafetySetting.HarmBlockThreshold], NoneType] = None, tools: Optional[List[ForwardRef('Tool')]] = None, tool_config: Optional[ForwardRef('ToolConfig')] = None, system_instruction: Union[str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]], NoneType] = None)\n",
-      " |  \n",
-      " |  Method resolution order:\n",
-      " |      GenerativeModel\n",
-      " |      vertexai.generative_models._generative_models._GenerativeModel\n",
-      " |      builtins.object\n",
-      " |  \n",
-      " |  Methods inherited from vertexai.generative_models._generative_models._GenerativeModel:\n",
-      " |  \n",
-      " |  __init__(self, model_name: str, *, generation_config: Union[ForwardRef('GenerationConfig'), Dict[str, Any], NoneType] = None, safety_settings: Union[List[ForwardRef('SafetySetting')], Dict[google.cloud.aiplatform_v1beta1.types.content.HarmCategory, google.cloud.aiplatform_v1beta1.types.content.SafetySetting.HarmBlockThreshold], NoneType] = None, tools: Optional[List[ForwardRef('Tool')]] = None, tool_config: Optional[ForwardRef('ToolConfig')] = None, system_instruction: Union[str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]], NoneType] = None)\n",
-      " |      Initializes GenerativeModel.\n",
-      " |      \n",
-      " |      Usage:\n",
-      " |          ```\n",
-      " |          model = GenerativeModel(\"gemini-pro\")\n",
-      " |          print(model.generate_content(\"Hello\"))\n",
-      " |          ```\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          model_name: Model Garden model resource name.\n",
-      " |              Alternatively, a tuned model endpoint resource name can be provided.\n",
-      " |          generation_config: Default generation config to use in generate_content.\n",
-      " |          safety_settings: Default safety settings to use in generate_content.\n",
-      " |          tools: Default tools to use in generate_content.\n",
-      " |          tool_config: Default tool config to use in generate_content.\n",
-      " |          system_instruction: Default system instruction to use in generate_content.\n",
-      " |              Note: Only text should be used in parts.\n",
-      " |              Content of each part will become a separate paragraph.\n",
-      " |  \n",
-      " |  compute_tokens(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]]) -> google.cloud.aiplatform_v1beta1.types.llm_utility_service.ComputeTokensResponse\n",
-      " |      Computes tokens.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          A ComputeTokensResponse object that has the following attributes:\n",
-      " |              tokens_info: Lists of tokens_info from the input.\n",
-      " |                           The input `contents: ContentsType` could have\n",
-      " |                           multiple string instances and each tokens_info\n",
-      " |                           item represents each string instance. Each token\n",
-      " |                           info consists tokens list, token_ids list and\n",
-      " |                           a role.\n",
-      " |  \n",
-      " |  async compute_tokens_async(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]]) -> google.cloud.aiplatform_v1beta1.types.llm_utility_service.ComputeTokensResponse\n",
-      " |      Computes tokens asynchronously.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          And awaitable for a ComputeTokensResponse object that has the following attributes:\n",
-      " |              tokens_info: Lists of tokens_info from the input.\n",
-      " |                           The input `contents: ContentsType` could have\n",
-      " |                           multiple string instances and each tokens_info\n",
-      " |                           item represents each string instance. Each token\n",
-      " |                           info consists tokens list, token_ids list and\n",
-      " |                           a role.\n",
-      " |  \n",
-      " |  count_tokens(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]], *, tools: Optional[List[ForwardRef('Tool')]] = None) -> google.cloud.aiplatform_v1beta1.types.prediction_service.CountTokensResponse\n",
-      " |      Counts tokens.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |          tools: A list of tools (functions) that the model can try calling.\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          A CountTokensResponse object that has the following attributes:\n",
-      " |              total_tokens: The total number of tokens counted across all instances from the request.\n",
-      " |              total_billable_characters: The total number of billable characters counted across all instances from the request.\n",
-      " |  \n",
-      " |  async count_tokens_async(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]], *, tools: Optional[List[ForwardRef('Tool')]] = None) -> google.cloud.aiplatform_v1beta1.types.prediction_service.CountTokensResponse\n",
-      " |      Counts tokens asynchronously.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |          tools: A list of tools (functions) that the model can try calling.\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          And awaitable for a CountTokensResponse object that has the following attributes:\n",
-      " |              total_tokens: The total number of tokens counted across all instances from the request.\n",
-      " |              total_billable_characters: The total number of billable characters counted across all instances from the request.\n",
-      " |  \n",
-      " |  generate_content(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]], *, generation_config: Union[ForwardRef('GenerationConfig'), Dict[str, Any], NoneType] = None, safety_settings: Union[List[ForwardRef('SafetySetting')], Dict[google.cloud.aiplatform_v1beta1.types.content.HarmCategory, google.cloud.aiplatform_v1beta1.types.content.SafetySetting.HarmBlockThreshold], NoneType] = None, tools: Optional[List[ForwardRef('Tool')]] = None, tool_config: Optional[ForwardRef('ToolConfig')] = None, stream: bool = False) -> Union[ForwardRef('GenerationResponse'), Iterable[ForwardRef('GenerationResponse')]]\n",
-      " |      Generates content.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |          generation_config: Parameters for the generation.\n",
-      " |          safety_settings: Safety settings as a mapping from HarmCategory to HarmBlockThreshold.\n",
-      " |          tools: A list of tools (functions) that the model can try calling.\n",
-      " |          tool_config: Config shared for all tools provided in the request.\n",
-      " |          stream: Whether to stream the response.\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          A single GenerationResponse object if stream == False\n",
-      " |          A stream of GenerationResponse objects if stream == True\n",
-      " |  \n",
-      " |  async generate_content_async(self, contents: Union[List[ForwardRef('Content')], List[Dict[str, Any]], str, ForwardRef('Image'), ForwardRef('Part'), List[Union[str, ForwardRef('Image'), ForwardRef('Part')]]], *, generation_config: Union[ForwardRef('GenerationConfig'), Dict[str, Any], NoneType] = None, safety_settings: Union[List[ForwardRef('SafetySetting')], Dict[google.cloud.aiplatform_v1beta1.types.content.HarmCategory, google.cloud.aiplatform_v1beta1.types.content.SafetySetting.HarmBlockThreshold], NoneType] = None, tools: Optional[List[ForwardRef('Tool')]] = None, tool_config: Optional[ForwardRef('ToolConfig')] = None, stream: bool = False) -> Union[ForwardRef('GenerationResponse'), AsyncIterable[ForwardRef('GenerationResponse')]]\n",
-      " |      Generates content asynchronously.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          contents: Contents to send to the model.\n",
-      " |              Supports either a list of Content objects (passing a multi-turn conversation)\n",
-      " |              or a value that can be converted to a single Content object (passing a single message).\n",
-      " |              Supports\n",
-      " |              * str, Image, Part,\n",
-      " |              * List[Union[str, Image, Part]],\n",
-      " |              * List[Content]\n",
-      " |          generation_config: Parameters for the generation.\n",
-      " |          safety_settings: Safety settings as a mapping from HarmCategory to HarmBlockThreshold.\n",
-      " |          tools: A list of tools (functions) that the model can try calling.\n",
-      " |          tool_config: Config shared for all tools provided in the request.\n",
-      " |          stream: Whether to stream the response.\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          An awaitable for a single GenerationResponse object if stream == False\n",
-      " |          An awaitable for a stream of GenerationResponse objects if stream == True\n",
-      " |  \n",
-      " |  start_chat(self, *, history: Optional[List[ForwardRef('Content')]] = None, response_validation: bool = True) -> 'ChatSession'\n",
-      " |      Creates a stateful chat session.\n",
-      " |      \n",
-      " |      Args:\n",
-      " |          history: Previous history to initialize the chat session.\n",
-      " |          response_validation: Whether to validate responses before adding\n",
-      " |              them to chat history. By default, `send_message` will raise\n",
-      " |              error if the request or response is blocked or if the response\n",
-      " |              is incomplete due to going over the max token limit.\n",
-      " |              If set to `False`, the chat session history will always\n",
-      " |              accumulate the request and response messages even if the\n",
-      " |              reponse if blocked or incomplete. This can result in an unusable\n",
-      " |              chat session state.\n",
-      " |      \n",
-      " |      Returns:\n",
-      " |          A ChatSession object.\n",
-      " |  \n",
-      " |  ----------------------------------------------------------------------\n",
-      " |  Data descriptors inherited from vertexai.generative_models._generative_models._GenerativeModel:\n",
-      " |  \n",
-      " |  __dict__\n",
-      " |      dictionary for instance variables (if defined)\n",
-      " |  \n",
-      " |  __weakref__\n",
-      " |      list of weak references to the object (if defined)\n",
-      "\n"
-     ]
-    }
-   ],
-   "source": [
-    "help(tuned_model)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "10c60cce-a703-438c-b00a-a21414c0ad64",
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "PyTorch 2.0 (Local)",
-   "language": "python",
-   "name": "pytorch-2-0"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.15"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}