Skip to content

Commit

Permalink
fix: separate folder for pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
ForYourEyesOnlyyy committed Oct 15, 2024
1 parent af5aa8f commit 55b6572
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 11 deletions.
11 changes: 6 additions & 5 deletions notebooks/model_experiments.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,29 +2,30 @@
"cells": [
{
"cell_type": "code",
"execution_count": 97,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"import sys \n",
"sys.path.append(\"/Users/maxmartyshov/Desktop/IU/year3/PMDL/Sentiment_Analysis_for_Financial_News/src\")"
"sys.path.append(\"/Users/maxmartyshov/Desktop/IU/year3/PMDL/Sentiment_Analysis_for_Financial_News/src\")\n",
"sys.path.append(\"/Users/maxmartyshov/Desktop/IU/year3/PMDL/Sentiment_Analysis_for_Financial_News/pipelines\")"
]
},
{
"cell_type": "code",
"execution_count": 98,
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pipeline artifact [: ae9e60fe-78f1-4f14-becc-d3b5837abed6] loaded successfully\n"
"Pipeline artifact [: bbc7f2bf-160c-4305-a301-946f07c365a1] loaded successfully\n"
]
}
],
"source": [
"from pipline_extract import extract_latest_loaders\n",
"from extract_training_data import extract_latest_loaders\n",
"\n",
"dataloaders = extract_latest_loaders()\n",
"train_loader = dataloaders['train']\n",
Expand Down
6 changes: 3 additions & 3 deletions src/pipline_extract.py → pipelines/extract_training_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,12 @@
def extract_latest_loaders() -> dict:
# Extract the latest loaders
client = Client()
pipline_name = "data_pipeline"
pipline_name = "training_data_pipeline"
pipeline = client.get_pipeline(pipline_name)
latest_run = pipeline.last_run
loaders_step = latest_run.steps["prepare_dataloaders"]
artifact = loaders_step.output.load()
train_loader = artifact["train"]
val_loader = artifact["validation"]
print(f"Pipeline artifact [: {latest_run.id}] loaded successfully")
return {"train": train_loader, "validation": val_loader}
print(f"Pipeline artifact: {latest_run.id} loaded successfully")
return {"train": train_loader, "validation": val_loader}
9 changes: 6 additions & 3 deletions src/data_pipeline.py → pipelines/training_data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@

import pandas as pd

import sys
sys.path.append("/Users/maxmartyshov/Desktop/IU/year3/PMDL/Sentiment_Analysis_for_Financial_News/src")

import data


Expand Down Expand Up @@ -41,17 +44,17 @@ def prepare_dataloaders(train_test: dict,


@pipeline
def data_pipeline(load, preprocess, split, prepare_dataloaders):
def training_data_pipeline(load, preprocess, split, prepare_dataloaders):
tweets = load()
preprocessed = preprocess(tweets)
split_tweets = split(preprocessed)
prepare_dataloaders(split_tweets)


data_pipeline_instance = data_pipeline(
training_data_pipeline_instance = training_data_pipeline(
load=load(),
preprocess=preprocess(),
split=split(),
prepare_dataloaders=prepare_dataloaders())

data_pipeline_instance.run()
training_data_pipeline_instance.run()

0 comments on commit 55b6572

Please sign in to comment.