Skip to content

Kalimati Data Pipeline #82

Kalimati Data Pipeline

Kalimati Data Pipeline #82

name: Kalimati Data Pipeline
on:
schedule:
- cron: '55 9 * * *' # Runs at 9:55 AM UTC, which is 3:40 PM Nepal Time daily
workflow_dispatch:
jobs:
run-scraper:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v2
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
pip install kaggle # Install Kaggle API
- name: Set up Kaggle credentials
env:
KAGGLE_USERNAME: ${{ secrets.KAGGLEUSERNAME }}
KAGGLE_KEY: ${{ secrets.KAGGLEKEY }}
run: |
mkdir -p ~/.kaggle
echo "{\"username\":\"$KAGGLE_USERNAME\",\"key\":\"$KAGGLE_KEY\"}" > ~/.kaggle/kaggle.json
chmod 600 ~/.kaggle/kaggle.json
- name: Run the scraper
run: python scrape.py
- name: Commit and Push Updated Dataset to GitHub
env:
GITHUB_TOKEN: ${{ secrets.GITHUBTOKEN }}
run: |
git config --local user.name "github-actions[bot]"
git config --local user.email "github-actions[bot]@users.noreply.github.com"
git add data/
git commit -m "Update dataset in data/ folder"
git push origin main # Specify the branch to push to
- name: Upload data to Kaggle
run: python kaggleUpdate.py