Skip to content

Commit

Permalink
Chapter 10 notebooks (#531)
Browse files Browse the repository at this point in the history
* Chapter 10 notebooks

* Update course/en/chapter10/section3.ipynb
  • Loading branch information
nataliaElv authored Nov 22, 2024
1 parent 80c5df0 commit 0454d7d
Show file tree
Hide file tree
Showing 3 changed files with 267 additions and 0 deletions.
57 changes: 57 additions & 0 deletions course/en/chapter10/section2.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Set up your Argilla instance"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install argilla"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import argilla as rg\n",
"\n",
"HF_TOKEN = \"...\" # only for private spaces\n",
"\n",
"client = rg.Argilla(\n",
" api_url=\"...\",\n",
" api_key=\"...\",\n",
" headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"client.me"
]
}
],
"metadata": {
"colab": {
"name": "Set up your Argilla instance",
"provenance": []
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
115 changes: 115 additions & 0 deletions course/en/chapter10/section3.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load your dataset to Argilla"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install argilla datasets"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import argilla as rg\n",
"\n",
"HF_TOKEN = \"...\" # only for private spaces\n",
"\n",
"client = rg.Argilla(\n",
" api_url=\"...\",\n",
" api_key=\"...\",\n",
" headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'text': Value(dtype='string', id=None),\n",
" 'label': Value(dtype='int64', id=None),\n",
" 'label_text': Value(dtype='string', id=None)}"
]
},
"execution_count": null,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from datasets import load_dataset\n",
"\n",
"data = load_dataset(\"SetFit/ag_news\", split=\"train\")\n",
"data.features"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"settings = rg.Settings(\n",
" fields=[rg.TextField(name=\"text\")],\n",
" questions=[\n",
" rg.LabelQuestion(\n",
" name=\"label\", title=\"Classify the text:\", labels=data.unique(\"label_text\")\n",
" ),\n",
" rg.SpanQuestion(\n",
" name=\"entities\",\n",
" title=\"Highlight all the entities in the text:\",\n",
" labels=[\"PERSON\", \"ORG\", \"LOC\", \"EVENT\"],\n",
" field=\"text\",\n",
" ),\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = rg.Dataset(name=\"ag_news\", settings=settings)\n",
"\n",
"dataset.create()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset.records.log(data, mapping={\"label_text\": \"label\"})"
]
}
],
"metadata": {
"colab": {
"name": "Load your dataset to Argilla",
"provenance": []
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
95 changes: 95 additions & 0 deletions course/en/chapter10/section5.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Use your annotated dataset"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install argilla"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import argilla as rg\n",
"\n",
"HF_TOKEN = \"...\" # only for private spaces\n",
"\n",
"client = rg.Argilla(\n",
" api_url=\"...\",\n",
" api_key=\"...\",\n",
" headers={\"Authorization\": f\"Bearer {HF_TOKEN}\"}, # only for private spaces\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = client.datasets(name=\"ag_news\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"status_filter = rg.Query(filter=rg.Filter([(\"status\", \"==\", \"completed\")]))\n",
"\n",
"filtered_records = dataset.records(status_filter)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"filtered_records.to_datasets().push_to_hub(\"argilla/ag_news_annotated\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset.to_hub(repo_id=\"argilla/ag_news_annotated\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"dataset = rg.Dataset.from_hub(repo_id=\"argilla/ag_news_annotated\")"
]
}
],
"metadata": {
"colab": {
"name": "Use your annotated dataset",
"provenance": []
},
"language_info": {
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 4
}

0 comments on commit 0454d7d

Please sign in to comment.