Chapter 10 notebooks (#531)

* Chapter 10 notebooks * Update course/en/chapter10/section3.ipynb
huggingface · Nov 22, 2024 · 0454d7d · 0454d7d
1 parent 80c5df0
commit 0454d7d
Show file tree

Hide file tree

Showing 3 changed files with 267 additions and 0 deletions.
diff --git a/course/en/chapter10/section2.ipynb b/course/en/chapter10/section2.ipynb
@@ -0,0 +1,57 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Set up your Argilla instance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install argilla"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import argilla as rg\n",
+    "\n",
+    "HF_TOKEN = \"...\"  # only for private spaces\n",
+    "\n",
+    "client = rg.Argilla(\n",
+    "    api_url=\"...\",\n",
+    "    api_key=\"...\",\n",
+    "    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"},  # only for private spaces\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client.me"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "name": "Set up your Argilla instance",
+   "provenance": []
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/course/en/chapter10/section3.ipynb b/course/en/chapter10/section3.ipynb
@@ -0,0 +1,115 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Load your dataset to Argilla"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install argilla datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import argilla as rg\n",
+    "\n",
+    "HF_TOKEN = \"...\"  # only for private spaces\n",
+    "\n",
+    "client = rg.Argilla(\n",
+    "    api_url=\"...\",\n",
+    "    api_key=\"...\",\n",
+    "    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"},  # only for private spaces\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'text': Value(dtype='string', id=None),\n",
+       " 'label': Value(dtype='int64', id=None),\n",
+       " 'label_text': Value(dtype='string', id=None)}"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from datasets import load_dataset\n",
+    "\n",
+    "data = load_dataset(\"SetFit/ag_news\", split=\"train\")\n",
+    "data.features"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "settings = rg.Settings(\n",
+    "    fields=[rg.TextField(name=\"text\")],\n",
+    "    questions=[\n",
+    "        rg.LabelQuestion(\n",
+    "            name=\"label\", title=\"Classify the text:\", labels=data.unique(\"label_text\")\n",
+    "        ),\n",
+    "        rg.SpanQuestion(\n",
+    "            name=\"entities\",\n",
+    "            title=\"Highlight all the entities in the text:\",\n",
+    "            labels=[\"PERSON\", \"ORG\", \"LOC\", \"EVENT\"],\n",
+    "            field=\"text\",\n",
+    "        ),\n",
+    "    ],\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = rg.Dataset(name=\"ag_news\", settings=settings)\n",
+    "\n",
+    "dataset.create()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.records.log(data, mapping={\"label_text\": \"label\"})"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "name": "Load your dataset to Argilla",
+   "provenance": []
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/course/en/chapter10/section5.ipynb b/course/en/chapter10/section5.ipynb
@@ -0,0 +1,95 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Use your annotated dataset"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install argilla"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import argilla as rg\n",
+    "\n",
+    "HF_TOKEN = \"...\"  # only for private spaces\n",
+    "\n",
+    "client = rg.Argilla(\n",
+    "    api_url=\"...\",\n",
+    "    api_key=\"...\",\n",
+    "    headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"},  # only for private spaces\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = client.datasets(name=\"ag_news\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "status_filter = rg.Query(filter=rg.Filter([(\"status\", \"==\", \"completed\")]))\n",
+    "\n",
+    "filtered_records = dataset.records(status_filter)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "filtered_records.to_datasets().push_to_hub(\"argilla/ag_news_annotated\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset.to_hub(repo_id=\"argilla/ag_news_annotated\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = rg.Dataset.from_hub(repo_id=\"argilla/ag_news_annotated\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "colab": {
+   "name": "Use your annotated dataset",
+   "provenance": []
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}