diff --git a/course/en/chapter10/section2.ipynb b/course/en/chapter10/section2.ipynb new file mode 100644 index 00000000..35dadb0c --- /dev/null +++ b/course/en/chapter10/section2.ipynb @@ -0,0 +1,57 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Set up your Argilla instance" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # only for private spaces\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "client.me" + ] + } + ], + "metadata": { + "colab": { + "name": "Set up your Argilla instance", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/course/en/chapter10/section3.ipynb b/course/en/chapter10/section3.ipynb new file mode 100644 index 00000000..b027a5e4 --- /dev/null +++ b/course/en/chapter10/section3.ipynb @@ -0,0 +1,115 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Load your dataset to Argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install argilla datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # only for private spaces\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'text': Value(dtype='string', id=None),\n", + " 'label': Value(dtype='int64', id=None),\n", + " 'label_text': Value(dtype='string', id=None)}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from datasets import load_dataset\n", + "\n", + "data = load_dataset(\"SetFit/ag_news\", split=\"train\")\n", + "data.features" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "settings = rg.Settings(\n", + " fields=[rg.TextField(name=\"text\")],\n", + " questions=[\n", + " rg.LabelQuestion(\n", + " name=\"label\", title=\"Classify the text:\", labels=data.unique(\"label_text\")\n", + " ),\n", + " rg.SpanQuestion(\n", + " name=\"entities\",\n", + " title=\"Highlight all the entities in the text:\",\n", + " labels=[\"PERSON\", \"ORG\", \"LOC\", \"EVENT\"],\n", + " field=\"text\",\n", + " ),\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = rg.Dataset(name=\"ag_news\", settings=settings)\n", + "\n", + "dataset.create()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.records.log(data, mapping={\"label_text\": \"label\"})" + ] + } + ], + "metadata": { + "colab": { + "name": "Load your dataset to Argilla", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/course/en/chapter10/section5.ipynb b/course/en/chapter10/section5.ipynb new file mode 100644 index 00000000..1d596498 --- /dev/null +++ b/course/en/chapter10/section5.ipynb @@ -0,0 +1,95 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Use your annotated dataset" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "!pip install argilla" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import argilla as rg\n", + "\n", + "HF_TOKEN = \"...\" # only for private spaces\n", + "\n", + "client = rg.Argilla(\n", + " api_url=\"...\",\n", + " api_key=\"...\",\n", + " headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = client.datasets(name=\"ag_news\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "status_filter = rg.Query(filter=rg.Filter([(\"status\", \"==\", \"completed\")]))\n", + "\n", + "filtered_records = dataset.records(status_filter)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "filtered_records.to_datasets().push_to_hub(\"argilla/ag_news_annotated\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset.to_hub(repo_id=\"argilla/ag_news_annotated\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dataset = rg.Dataset.from_hub(repo_id=\"argilla/ag_news_annotated\")" + ] + } + ], + "metadata": { + "colab": { + "name": "Use your annotated dataset", + "provenance": [] + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}