From 4187f03983b93ba8b7f8fbfb6f38f516917dcf55 Mon Sep 17 00:00:00 2001 From: Tania Allard Date: Thu, 1 Dec 2022 15:56:22 +0000 Subject: [PATCH] MAINT - Miscellaneous maintenance tasks (#1580) --- .github/ISSUE_TEMPLATE/RFD.md | 52 --- .github/workflows/release.yaml | 18 +- .github/workflows/run-pre-commit.yaml | 6 +- .github/workflows/test-provider.yaml | 9 +- .github/workflows/test.yaml | 6 +- .pre-commit-config.yaml | 136 ++++--- docs/index.md | 8 +- docs/source/admin_guide/argo-workflows.md | 16 +- docs/source/admin_guide/awss3curl.md | 19 +- docs/source/admin_guide/backup.md | 81 ++-- docs/source/admin_guide/breaking-upgrade.md | 83 ++-- docs/source/admin_guide/clearml.md | 19 +- docs/source/admin_guide/cost.md | 52 ++- docs/source/admin_guide/custom-helm-charts.md | 5 +- docs/source/admin_guide/faq.md | 30 +- docs/source/admin_guide/gpu.md | 45 ++- docs/source/admin_guide/jupyterhub.md | 22 +- docs/source/admin_guide/keycloak.md | 18 +- docs/source/admin_guide/monitoring.md | 18 +- .../admin_guide/preemptible-spot-instances.md | 8 +- docs/source/admin_guide/prefect.md | 57 +-- docs/source/admin_guide/system_maintenance.md | 34 +- docs/source/admin_guide/traefik.md | 8 +- docs/source/admin_guide/troubleshooting.md | 78 ++-- docs/source/admin_guide/upgrade.md | 47 ++- docs/source/dev_guide/architecture.md | 128 ++++--- docs/source/dev_guide/contribution.md | 10 +- docs/source/dev_guide/keycloak.md | 104 +++--- docs/source/dev_guide/minikube.md | 173 +++++---- docs/source/dev_guide/release.md | 12 +- docs/source/dev_guide/testing.md | 93 +++-- docs/source/installation/configuration.md | 353 +++++++++++------- docs/source/installation/existing.md | 23 +- docs/source/installation/login.md | 83 ++-- docs/source/installation/management.md | 27 +- docs/source/installation/setup.md | 188 ++++++---- docs/source/installation/usage.md | 108 +++--- docs/source/introduction/index.md | 34 +- docs/source/introduction/qhub-101.md | 67 ++-- docs/source/user_guide/code_server.md | 11 +- docs/source/user_guide/dashboard.md | 36 +- docs/source/user_guide/dask_gateway.md | 65 ++-- docs/source/user_guide/environments.md | 33 +- docs/source/user_guide/faq.md | 64 ++-- docs/source/user_guide/getting_started.md | 43 ++- docs/source/user_guide/idle_culler.md | 26 +- docs/source/user_guide/ssh.md | 25 +- docs/source/user_guide/training.md | 16 +- docs/source/user_guide/troubleshooting.md | 38 +- nebari/provider/cicd/github.py | 4 +- 50 files changed, 1546 insertions(+), 1093 deletions(-) delete mode 100644 .github/ISSUE_TEMPLATE/RFD.md diff --git a/.github/ISSUE_TEMPLATE/RFD.md b/.github/ISSUE_TEMPLATE/RFD.md deleted file mode 100644 index b1af18c825..0000000000 --- a/.github/ISSUE_TEMPLATE/RFD.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -name: "Request for Discussion (RFD) 💬" -about: Open discussion about a feature or design project in Nebari. -labels: - - "type: RFD" -title: "RFD - Title" ---- - - - - - - -| Status | Draft 🚧 / Open for comments 💬/ Accepted ✅ /Implemented 🚀/ Obsolete 🗃 | -| ----------------- | ------------------------------------------------------------------------ | -| Author(s) | GitHub handle | -| Date Created | dd-MM-YYY | -| Date Last updated | dd-MM-YYY | -| Decision deadline | dd-MM-YYY | - -# Title - -## Summary - - - -## User benefit - - - -## Design Proposal - - - -### Alternatives or approaches considered (if any) - - - -### Best practices - - - -### User impact - - - -## Unresolved questions - - diff --git a/.github/workflows/release.yaml b/.github/workflows/release.yaml index a1902ff239..c27bc82d41 100644 --- a/.github/workflows/release.yaml +++ b/.github/workflows/release.yaml @@ -11,7 +11,7 @@ jobs: steps: - name: Set up python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: "3.10" @@ -19,7 +19,7 @@ jobs: run: python -m pip install --upgrade pip build - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 @@ -41,10 +41,10 @@ jobs: - name: Test install from Test PyPI run: | - pip install \ - --index-url https://test.pypi.org/simple/ \ - --extra-index-url https://pypi.org/simple \ - nebari==${{ env.NEBARI_TAG }} + pip install \ + --index-url https://test.pypi.org/simple/ \ + --extra-index-url https://pypi.org/simple \ + nebari==${{ env.NEBARI_TAG }} release-pypi: name: Publish Nebari on PyPi @@ -53,7 +53,7 @@ jobs: steps: - name: Set up python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: "3.10" @@ -61,7 +61,7 @@ jobs: run: python -m pip install --upgrade pip build - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 with: fetch-depth: 0 @@ -69,6 +69,6 @@ jobs: run: python -m build --sdist --wheel . - name: Publish package - uses: pypa/gh-action-pypi-publish@release/v1 + uses: pypa/gh-action-pypi-publish@v1.5.1 with: password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/run-pre-commit.yaml b/.github/workflows/run-pre-commit.yaml index 160bf53501..c6b4c0bd4b 100644 --- a/.github/workflows/run-pre-commit.yaml +++ b/.github/workflows/run-pre-commit.yaml @@ -3,8 +3,8 @@ name: Run pre-commit on: push: branches: - - main - - release/\d{4}.\d{1,2}.\d{1,2} + - main + - release/\d{4}.\d{1,2}.\d{1,2} pull_request: jobs: @@ -16,7 +16,7 @@ jobs: shell: bash -l {0} steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v3 - name: Install pre-commit run: pip install pre-commit diff --git a/.github/workflows/test-provider.yaml b/.github/workflows/test-provider.yaml index c8668d9f57..56184ed4f8 100644 --- a/.github/workflows/test-provider.yaml +++ b/.github/workflows/test-provider.yaml @@ -39,11 +39,10 @@ env: ARM_SUBSCRIPTION_ID: ${{ secrets.ARM_SUBSCRIPTION_ID }} ARM_TENANT_ID: ${{ secrets.ARM_TENANT_ID }} - jobs: test-render-providers: if: github.event.pull_request.head.repo.full_name == github.repository || github.event_name != 'pull_request' - name: 'Test Nebari Provider' + name: "Test Nebari Provider" runs-on: ubuntu-latest strategy: matrix: @@ -59,8 +58,8 @@ jobs: - github-actions - gitlab-ci steps: - - name: 'Checkout Infrastructure' - uses: actions/checkout@main + - name: "Checkout Infrastructure" + uses: actions/checkout@v3 - name: Checkout the branch from the PR that triggered the job if: ${{ github.event_name == 'issue_comment' }} @@ -69,7 +68,7 @@ jobs: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - name: Set up Python - uses: actions/setup-python@v1 + uses: actions/setup-python@v4 with: python-version: 3.8 - name: Set up Cloud SDK diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3cc55fe5af..cfc1faf603 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -27,7 +27,7 @@ on: jobs: test-general: - name: 'Pytest' + name: "Pytest" runs-on: ubuntu-latest strategy: matrix: @@ -37,8 +37,8 @@ jobs: - "3.9" - "3.10" steps: - - name: 'Checkout Infrastructure' - uses: actions/checkout@main + - name: "Checkout Infrastructure" + uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a3f765d43c..ee8f15c525 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,69 +1,87 @@ +# pre-commit is a tool to perform a predefined set of tasks manually and/or +# automatically before git commits are made. +# +# Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level +# +# Common tasks +# +# - Register git hooks: pre-commit install --install-hooks +# - Run on all files: pre-commit run --all-files +# # These pre-commit hooks are run as CI. - +# # NOTE: if it can be avoided, add configs/args in pyproject.toml, setup.cfg or below instead of creating a new `.config.file`. - repos: -# general -- repo: /~https://github.com/pre-commit/pre-commit-hooks - rev: v2.3.0 - hooks: - - id: end-of-file-fixer - - id: trailing-whitespace - - id: check-json - - id: check-yaml - # jinja2 templates for helm charts - exclude: 'nebari/template/stages/07-kubernetes-services/modules/kubernetes/services/(clearml/chart/templates/.*|prefect/chart/templates/.*)' - args: [--allow-multiple-documents] + # general + - repo: /~https://github.com/pre-commit/pre-commit-hooks + rev: v4.3.0 + hooks: + - id: end-of-file-fixer + - id: trailing-whitespace + - id: check-json + - id: check-yaml + # jinja2 templates for helm charts + exclude: "nebari/template/stages/07-kubernetes-services/modules/kubernetes/services/(clearml/chart/templates/.*|prefect/chart/templates/.*)" + args: [--allow-multiple-documents] + - id: check-toml + # Lint: Checks that non-binary executables have a proper shebang. + - id: check-executables-have-shebangs + exclude: "^nebari/template/" -- repo: /~https://github.com/codespell-project/codespell - rev: v2.1.0 - hooks: - - id: codespell - # --write (-w) enabled here - # see setup.cfg for more config options - entry: codespell -w - language: python + - repo: /~https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + # --write (-w) enabled here + # see setup.cfg for more config options + entry: codespell -w + language: python -# python -- repo: /~https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black + # python + - repo: /~https://github.com/psf/black + rev: 22.10.0 + hooks: + - id: black -- repo: /~https://github.com/pycqa/flake8 - rev: 3.8.4 - hooks: - - id: flake8 - args: [ - "--builtins=c" - ] + - repo: /~https://github.com/pycqa/flake8 + rev: 5.0.4 + hooks: + - id: flake8 + args: + [ + "--builtins=c", + "--ignore=E203,E266,E501,W503", + "--exclude=[.git,__pycache__,docs/source/conf.py,nebari/template,build,dist,docs,home]", + ] -- repo: /~https://github.com/pycqa/isort - rev: 5.10.1 - hooks: - - id: isort - name: isort - args: ["--profile", "black"] + - repo: /~https://github.com/pycqa/isort + rev: 5.10.1 + hooks: + - id: isort + name: isort + additional_dependencies: [toml] + files: \.py$ + args: ["--profile", "black"] -# terraform -- repo: /~https://github.com/antonbabenko/pre-commit-terraform - rev: v1.71.0 - hooks: - - id: terraform_fmt - args: - - --args=-write=true + # terraform + - repo: /~https://github.com/antonbabenko/pre-commit-terraform + rev: v1.76.0 + hooks: + - id: terraform_fmt + args: + - --args=-write=true -# markdown -- repo: /~https://github.com/executablebooks/mdformat - rev: 0.7.14 - hooks: - - id: mdformat - files: ^docs/ - name: mdformat - entry: mdformat --wrap=180 --number --end-of-line=lf - language: python - types: [markdown] - minimum_pre_commit_version: '2.0.0' - additional_dependencies: - - mdformat-tables + # markdown + - repo: /~https://github.com/executablebooks/mdformat + rev: 0.7.16 + hooks: + - id: mdformat + files: ^docs/ + name: mdformat + entry: mdformat --wrap=120 --number --end-of-line=lf + language: python + types: [markdown] + minimum_pre_commit_version: "2.0.0" + additional_dependencies: + - mdformat-tables diff --git a/docs/index.md b/docs/index.md index a0e1942213..1a221a2ce2 100644 --- a/docs/index.md +++ b/docs/index.md @@ -9,9 +9,11 @@ Open source tooling for data science research, development, and deployment. # What is QHub? -QHUb is an integrated data science environment designed and developed by scientists at [**Quansight**](https://www.quansight.com/). It enables teams to build and maintain a cost -effective and scalable compute/data science platform in the Cloud, providing an [**Infrastructure as Code**](https://en.wikipedia.org/wiki/Infrastructure_as_code) platform that -streamlines the deployment of data science infrastructure. +QHUb is an integrated data science environment designed and developed by scientists at +[**Quansight**](https://www.quansight.com/). It enables teams to build and maintain a cost effective and scalable +compute/data science platform in the Cloud, providing an +[**Infrastructure as Code**](https://en.wikipedia.org/wiki/Infrastructure_as_code) platform that streamlines the +deployment of data science infrastructure. ```{toctree} :maxdepth: 1 diff --git a/docs/source/admin_guide/argo-workflows.md b/docs/source/admin_guide/argo-workflows.md index 82fe7a895f..f5baf5835b 100644 --- a/docs/source/admin_guide/argo-workflows.md +++ b/docs/source/admin_guide/argo-workflows.md @@ -1,10 +1,12 @@ # Argo Workflows -Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. Argo workflows comes enabled by default with Qhub deployments. +Argo Workflows is an open source container-native workflow engine for orchestrating parallel jobs on Kubernetes. Argo +workflows comes enabled by default with Qhub deployments. ## Accessing Argo Server -If Argo Workflows is enabled, users can access argo workflows server at: `your-qhub-domain.com/argo`. Log in via Keycloak with your usual credentials. +If Argo Workflows is enabled, users can access argo workflows server at: `your-qhub-domain.com/argo`. Log in via +Keycloak with your usual credentials. Refer to the [Argo documentation](https://argoproj.github.io/argo-workflows/) for further details on Argo Workflows. @@ -16,9 +18,10 @@ You can submit a workflow by clicking "SUBMIT NEW WORKFLOW" on the landing page. ## Overrides of Argo Workflows Helm Chart values -Argo Workflows is deployed using Argo Workflows Helm Chart version 0.13.1. The values.yaml for the helm chart can be overridden as needed via the overrides flag. The default values -file can be found [here](/~https://github.com/argoproj/argo-helm/blob/argo-workflows-0.13.1/charts/argo-workflows/values.yaml). For example, the following could be done to add -additional environment variables to the controller container. +Argo Workflows is deployed using Argo Workflows Helm Chart version 0.13.1. The values.yaml for the helm chart can be +overridden as needed via the overrides flag. The default values file can be found +[here](/~https://github.com/argoproj/argo-helm/blob/argo-workflows-0.13.1/charts/argo-workflows/values.yaml). For example, +the following could be done to add additional environment variables to the controller container. ```yaml argo_workflows: @@ -32,7 +35,8 @@ argo_workflows: ## Disabling Argo Workflows -To turn off the cluster monitoring on QHub deployments, simply turn off the feature flag within your `qhub-config.yaml` file. For example: +To turn off the cluster monitoring on QHub deployments, simply turn off the feature flag within your `qhub-config.yaml` +file. For example: ```yaml argo_workflows: diff --git a/docs/source/admin_guide/awss3curl.md b/docs/source/admin_guide/awss3curl.md index 02feee2b8e..c247b9a3b6 100644 --- a/docs/source/admin_guide/awss3curl.md +++ b/docs/source/admin_guide/awss3curl.md @@ -1,13 +1,17 @@ # Using Curl to access AWS S3 -In some situations, users may wish to upload content to S3 or download content from an S3 bucket. For example, when attempting [manual backups of QHub's data](./backup.md). +In some situations, users may wish to upload content to S3 or download content from an S3 bucket. For example, when +attempting [manual backups of QHub's data](./backup.md). -In many cases, the most straightforward way to access AWS S3 buckets is by installing and using AWS's command-line tool. But in some situations - for example, to back up the -JupyterHub SQLite database - it may be difficult to install AWS' CLI tools due to being in a restricted container environment. In that situation, it is possible to fall back on -AWS' basic REST API and use HTTPS requests directly instead. (Ultimately, the AWS CLI is simply a wrapper around those REST APIs.) +In many cases, the most straightforward way to access AWS S3 buckets is by installing and using AWS's command-line tool. +But in some situations - for example, to back up the JupyterHub SQLite database - it may be difficult to install AWS' +CLI tools due to being in a restricted container environment. In that situation, it is possible to fall back on AWS' +basic REST API and use HTTPS requests directly instead. (Ultimately, the AWS CLI is simply a wrapper around those REST +APIs.) -This document describes how to use `curl` commands to interface with S3 directly, specifically in the case of [uploading a backup of JupyterHub's SQLite database](./backup.md) from -a restricted pod to S3 (or restoring it from a backup from S3). +This document describes how to use `curl` commands to interface with S3 directly, specifically in the case of +[uploading a backup of JupyterHub's SQLite database](./backup.md) from a restricted pod to S3 (or restoring it from a +backup from S3). ## Common settings @@ -71,5 +75,6 @@ curl -H "Host: s3-${region}.amazonaws.com" \ ______________________________________________________________________ -Inspired by [this article on how to use curl to upload files to was s3](https://www.gyanblog.com/aws/how-upload-aws-s3-curl/) and +Inspired by +[this article on how to use curl to upload files to was s3](https://www.gyanblog.com/aws/how-upload-aws-s3-curl/) and [this StackOverflow answer on how to access was s3 buckets](https://stackoverflow.com/a/57516606/2792760). diff --git a/docs/source/admin_guide/backup.md b/docs/source/admin_guide/backup.md index c60a2f8fdf..8b2b5feacd 100644 --- a/docs/source/admin_guide/backup.md +++ b/docs/source/admin_guide/backup.md @@ -2,8 +2,8 @@ Your cloud provider may have native ways to backup your Kubernetes cluster and volumes. -This guide describes how you would manually obtain the data you need to repopulate your QHub if your cluster is lost and you wish to start it up again from the `qhub-config.yaml` -file. +This guide describes how you would manually obtain the data you need to repopulate your QHub if your cluster is lost and +you wish to start it up again from the `qhub-config.yaml` file. There are three main locations that you need to backup: @@ -28,7 +28,8 @@ This specific guide shows how to do this on an AWS cluster and upload to AWS S3. ### Kubectl configuration -To setup kubectl, obtain the name of the cluster. If you know the deployment region of the current cluster, this is straightforward: +To setup kubectl, obtain the name of the cluster. If you know the deployment region of the current cluster, this is +straightforward: ```shell aws eks list-clusters --region=us-west-2 @@ -42,8 +43,8 @@ aws eks update-kubeconfig --region us-west-2 --name ### Pod deployment -With `kubectl` configured, the next step will be to deploy the pod that allows you to access the cluster files. First, save the following pod specification to a file named -`pod.yaml`: +With `kubectl` configured, the next step will be to deploy the pod that allows you to access the cluster files. First, +save the following pod specification to a file named `pod.yaml`: ```yaml kind: Pod @@ -65,7 +66,8 @@ spec: name: volume-to-debug-ubuntu ``` -> Note in QHub versions before v0.4 replace `claimName: "jupyterhub-dev-share"` with `claimName: "nfs-mount-dev-share"` above. +> Note in QHub versions before v0.4 replace `claimName: "jupyterhub-dev-share"` with `claimName: "nfs-mount-dev-share"` +> above. Once the file `pod.yml` has been created, run the following command: @@ -73,7 +75,8 @@ Once the file `pod.yml` has been created, run the following command: kubectl apply -f pod.yaml -n dev ``` -If you have a namespace other than the default dev, replace `dev` with your namespace when running `kubectl`. To get a shell to this running pod, run: +If you have a namespace other than the default dev, replace `dev` with your namespace when running `kubectl`. To get a +shell to this running pod, run: ```shell kubectl exec -n dev --stdin --tty volume-debugger-ubuntu -- /bin/bash @@ -100,7 +103,8 @@ unzip awscliv2.zip aws configure ``` -The last line in the command above prompts for your AWS public/private key and default region. Paste each of these and press enter. To ignore and skip the output, press enter. +The last line in the command above prompts for your AWS public/private key and default region. Paste each of these and +press enter. To ignore and skip the output, press enter. ### Backups @@ -111,8 +115,8 @@ cd /data tar -cvf .tar . ``` -The preferred naming scheme includes a year-month-day, example `2021-04-23_home_backup.tar`. You can utilize multi-backups through this step. This step takes several minutes -depending on the size of the home directories. +The preferred naming scheme includes a year-month-day, example `2021-04-23_home_backup.tar`. You can utilize +multi-backups through this step. This step takes several minutes depending on the size of the home directories. ### Upload to block storage @@ -122,8 +126,8 @@ Once this is complete, upload the tar file to S3 using the AWS command-line tool aws s3 cp 2021-04-23.tar s3:///backups/2021-04-23.tar ``` -Replacing `your_bucket_name` with a bucket you have created. If you don't have an existing bucket, instructions are here: - +Replacing `your_bucket_name` with a bucket you have created. If you don't have an existing bucket, instructions are +here: ### Download from block storage and decompress @@ -152,9 +156,11 @@ The file permissions for the default tar is same as the original files. > **Important: If upgrading from 0.3.14 or earlier to 0.4 or later** > -> QHub v0.4: If restoring your NFS as part of the upgrade you must also run some extra commands, immediately after extracting from the tar file. +> QHub v0.4: If restoring your NFS as part of the upgrade you must also run some extra commands, immediately after +> extracting from the tar file. > -> Previous versions contained the `shared` folder within `home`. From `0.4.0` both `shared` and `home` directories are at the same level with respect to the QHub filesystem: +> Previous versions contained the `shared` folder within `home`. From `0.4.0` both `shared` and `home` directories are +> at the same level with respect to the QHub filesystem: > > ```shell > cd /data @@ -173,8 +179,9 @@ The file permissions for the default tar is same as the original files. ### Google cloud provider -To use the Google Cloud provider, install the [gsutil](https://cloud.google.com/storage/docs/gsutil_install) CLI instead of the AWS CLI. Otherwise, the instructions are the same as -for AWS above, other than when working with S3. Here are the commands to access Google Spaces instead of S3 for copy/download of the backup: +To use the Google Cloud provider, install the [gsutil](https://cloud.google.com/storage/docs/gsutil_install) CLI instead +of the AWS CLI. Otherwise, the instructions are the same as for AWS above, other than when working with S3. Here are the +commands to access Google Spaces instead of S3 for copy/download of the backup: ```shell cd /data @@ -186,17 +193,21 @@ gsutil cp gs:///backups/2021-04-23.tar . ### Digital Ocean -Instructions will be similar to those for AWS above, but use Digital Ocean spaces instead of S3. This guide explains installation of the command-line tool: +Instructions will be similar to those for AWS above, but use Digital Ocean spaces instead of S3. This guide explains +installation of the command-line tool: ## JupyterHub Database -The JupyterHub database will mostly be recreated whenever you start a new cluster, but should be backed up to save Dashboard configurations. +The JupyterHub database will mostly be recreated whenever you start a new cluster, but should be backed up to save +Dashboard configurations. -You want to do something very similar to the NFS backup, above - this time you need to back up just one file located in the PersistentVolume `hub-db-dir`. +You want to do something very similar to the NFS backup, above - this time you need to back up just one file located in +the PersistentVolume `hub-db-dir`. -First, you might think you can just make a new `pod.yaml` file, this time specifying `claimName: "hub-db-dir"` instead of `claimName: "jupyterhub-dev-share"`. However, `hub-db-dir` -is 'Read Write Once' - the 'Once' meaning it can only be mounted to one pod at a time, but the JupyterHub pod will already have this mounted! So the same approach will not work +First, you might think you can just make a new `pod.yaml` file, this time specifying `claimName: "hub-db-dir"` instead +of `claimName: "jupyterhub-dev-share"`. However, `hub-db-dir` is 'Read Write Once' - the 'Once' meaning it can only be +mounted to one pod at a time, but the JupyterHub pod will already have this mounted! So the same approach will not work here. Instead of mounting to a new 'debugger pod' you have to access the JupyterHub pod directly using the `kubectl` CLI. @@ -219,8 +230,9 @@ There is no need to TAR anything up since the only file required to be backed up ### Backing up JupyterHub DB -Now we just need to upload the file to S3. You might want to [install the AWS CLI tool](#installations) as we did before, however, as the Hub container is a rather restricted -environment the recommended approach is to upload files to AWS S3 buckets using curl. +Now we just need to upload the file to S3. You might want to [install the AWS CLI tool](#installations) as we did +before, however, as the Hub container is a rather restricted environment the recommended approach is to upload files to +AWS S3 buckets using curl. For more details please refer to the [using curl to access AWS S3 buckets](./awss3curl.md) documentation. @@ -238,18 +250,22 @@ As for uploads, [you may need to use curl to download items from an AWS S3 bucke ## Keycloak user/group database -QHub provides a simple script to export the important user/group database. Your new QHub cluster will recreate a lot of Keycloak config (including new Keycloak clients which will -have new secrets), so only the high-level Group and User info is exported. +QHub provides a simple script to export the important user/group database. Your new QHub cluster will recreate a lot of +Keycloak config (including new Keycloak clients which will have new secrets), so only the high-level Group and User info +is exported. If you have a heavily customized Keycloak configuration, some details may be omitted in this export. ### Export Keycloak -The export script is at [`qhub/scripts/keycloak-export.py`](/~https://github.com/Quansight/qhub/blob/main/scripts/keycloak-export.py). +The export script is at +[`qhub/scripts/keycloak-export.py`](/~https://github.com/Quansight/qhub/blob/main/scripts/keycloak-export.py). -Locate your `qhub-config.yaml` file, for example by checking out of your Git repo for you QHub. Activate a virtual environment with the `qhub` Python package installed. +Locate your `qhub-config.yaml` file, for example by checking out of your Git repo for you QHub. Activate a virtual +environment with the `qhub` Python package installed. -This assumes that the password visible in the `qhub-config.yaml` file under the `security.keycloak.initial_root_password` field is still valid for the root user. +This assumes that the password visible in the `qhub-config.yaml` file under the +`security.keycloak.initial_root_password` field is still valid for the root user. If not, first set the `KEYCLOAK_ADMIN_PASSWORD` environment variable to the new value. @@ -263,8 +279,11 @@ You may wish to upload the Keycloak export to the same S3 location where you upl ### Import Keycloak -To re-import your users and groups, [login to the /auth/ URL](../installation/login.md) using the root username and password. +To re-import your users and groups, [login to the /auth/ URL](../installation/login.md) using the root username and +password. -Under 'Manage' on the left-hand side, click 'Import'. Locate the `exported-keycloak.json` file and select it. Then click the 'Import' button. +Under 'Manage' on the left-hand side, click 'Import'. Locate the `exported-keycloak.json` file and select it. Then click +the 'Import' button. -All users and groups should now be present in Keycloak. Note that the passwords will not have been restored so you may need to be reset them after this step. +All users and groups should now be present in Keycloak. Note that the passwords will not have been restored so you may +need to be reset them after this step. diff --git a/docs/source/admin_guide/breaking-upgrade.md b/docs/source/admin_guide/breaking-upgrade.md index 59676ed010..5f8102e00e 100644 --- a/docs/source/admin_guide/breaking-upgrade.md +++ b/docs/source/admin_guide/breaking-upgrade.md @@ -1,34 +1,41 @@ # Upgrade - Redeployment for Breaking Changes -For versions that are known to require redeployment when upgrading, this is an outline of the steps required to upgrade QHub: +For versions that are known to require redeployment when upgrading, this is an outline of the steps required to upgrade +QHub: - Back up user data by following [this backup guide](./backup.md). - Change the existing cluster to a different URL (for example `qhub-old.mycompany.com`) so it is hidden. - Run `qhub upgrade` to make recommended modifications to the `qhub-config.yaml` file. -- Deploy a new QHub with the desired version (to your original preferred URL for example `qhub.mycompany.com` but a new project_name to avoid resource name clashes). +- Deploy a new QHub with the desired version (to your original preferred URL for example `qhub.mycompany.com` but a new + project_name to avoid resource name clashes). - Restore user data to the new cluster. - Once you have confirmed the backup and upgrade have been successfully completed, you can destroy the old cluster. Full details of the steps appear below. -Please always check the [release notes](../dev_guide/changelog.md) for more details - and in all cases, backup your data before upgrading. +Please always check the [release notes](../dev_guide/changelog.md) for more details - and in all cases, backup your data +before upgrading. > The rest of this guide assumes you are upgrading from version v0.3.14 (or earlier) to v0.4. -You may be deploying QHub based on a local configuration file, or you may be using CI/CD workflows in GitHub or GitLab. Either way, you will need to locate a copy of your -`qhub-config.yaml` configuration file to upgrade it (and commit back to your git repo in the CI/CD case). +You may be deploying QHub based on a local configuration file, or you may be using CI/CD workflows in GitHub or GitLab. +Either way, you will need to locate a copy of your `qhub-config.yaml` configuration file to upgrade it (and commit back +to your git repo in the CI/CD case). -For CI/CD deployments, you will need to `git clone ` into a folder on your local machine if you haven't done so already. +For CI/CD deployments, you will need to `git clone ` into a folder on your local machine if you haven't done +so already. ## 1. Backup existing data -Perform manual backups of the NFS data and JupyterHub database (ignore the section about Keycloak data since that will not exist in your v0.3.14 cluster). +Perform manual backups of the NFS data and JupyterHub database (ignore the section about Keycloak data since that will +not exist in your v0.3.14 cluster). For more details please refer to the [Manual Backup guide](./backup.md). ## 2. Rename existing QHub URL -This will allow the existing cluster to remain alive in case it is needed, but the idea would be not to have it in use from now on. +This will allow the existing cluster to remain alive in case it is needed, but the idea would be not to have it in use +from now on. In the `qhub-config.yaml` for example: @@ -64,15 +71,16 @@ In the folder containing your QHub configuration file, run: qhub upgrade -c qhub-config.yaml ``` -This will output a newer version of `qhub-config.yaml` that's compatible with the new version of `qhub`. The process outputs a list of changes it has made. +This will output a newer version of `qhub-config.yaml` that's compatible with the new version of `qhub`. The process +outputs a list of changes it has made. -The `upgrade` command creates a copy of the original unmodified config file (`qhub-config.yaml.old.backup`) as well as a JSON file (`qhub-users-import.json`) used to import -existing users into Keycloak. +The `upgrade` command creates a copy of the original unmodified config file (`qhub-config.yaml.old.backup`) as well as a +JSON file (`qhub-users-import.json`) used to import existing users into Keycloak. ## 5. Rename the Project and Increase Kubernetes version -You need to rename the project to avoid clashes with the existing (old) cluster which would otherwise already own resources based on the names that the new cluster will attempt to -use. +You need to rename the project to avoid clashes with the existing (old) cluster which would otherwise already own +resources based on the names that the new cluster will attempt to use. The domain should remain as the preferred main one that was always in use previously. @@ -90,8 +98,8 @@ project_name: myqhubnew domain: qhub.myproj.com ``` -> It is also a good time to upgrade your version of Kubernetes. Look for the `kubernetes_version` field within the cloud provider section of the `qhub-config.yaml` file and -> increase it to the latest. +> It is also a good time to upgrade your version of Kubernetes. Look for the `kubernetes_version` field within the cloud +> provider section of the `qhub-config.yaml` file and increase it to the latest. ## 6. Redeploy QHub @@ -101,21 +109,23 @@ You will now have a `qhub-config.yaml` file that you can deploy. qhub deploy -c qhub-config.yaml ``` -At this point you will see an error message saying that deployment is prevented due to the `prevent_deploy` setting in your YAML file. This is a safeguard to ensure that you only -proceed if you are aware of possible breaking changes in the current upgrade. +At this point you will see an error message saying that deployment is prevented due to the `prevent_deploy` setting in +your YAML file. This is a safeguard to ensure that you only proceed if you are aware of possible breaking changes in the +current upgrade. Make sure to **backup your data** as described in the [backup section of the documentation](./backup.md). -Only after backing up your data proceed to remove the `prevent_deploy: true` line in the `qhub-config.yaml` file. This `prevent_deploy` functionality is there as a safeguard. -Please only remove it if you understand why it was there in the first place - as a way to stop users blindly upgrading without realising they absolutely needed to backup their data +Only after backing up your data proceed to remove the `prevent_deploy: true` line in the `qhub-config.yaml` file. This +`prevent_deploy` functionality is there as a safeguard. Please only remove it if you understand why it was there in the +first place - as a way to stop users blindly upgrading without realising they absolutely needed to backup their data first so that it can be restored into a completely new cluster. Run the `qhub deploy -c qhub-config.yaml` command again and it should get further this time. ## 7. CI/CD: render and commit to git -For CI/CD (GitHub/GitLab) workflows, as well as generating the updated `qhub-config.yaml` files as above, you will also need to regenerate the workflow files based on the latest -`qhub` version's templates. +For CI/CD (GitHub/GitLab) workflows, as well as generating the updated `qhub-config.yaml` files as above, you will also +need to regenerate the workflow files based on the latest `qhub` version's templates. With the newly upgraded `qhub-config.yaml` file, run: @@ -123,10 +133,11 @@ With the newly upgraded `qhub-config.yaml` file, run: qhub render -c qhub-config.yaml ``` -(Note that `qhub deploy` would have performed this render step too, but will also immediately redeploy your QHub instance. Run the render command alone if you are now working -separately in your repo and don't want to redeploy.) +(Note that `qhub deploy` would have performed this render step too, but will also immediately redeploy your QHub +instance. Run the render command alone if you are now working separately in your repo and don't want to redeploy.) -Commit all the files (`qhub-config.yaml` and GitHub/GitLab workflow files) back to the remote repo. All files need to be committed together in the same commit. For example: +Commit all the files (`qhub-config.yaml` and GitHub/GitLab workflow files) back to the remote repo. All files need to be +committed together in the same commit. For example: ```shell git commit -am"First render of 0.4" @@ -142,8 +153,9 @@ If your QHub deployment relies on Auth0 or GitHub for authentication, please upd 2. Select the "Regular Web Application" with the name of your deployment. -3. Under the "Application URIs" section, paste the new OAuth callback URL in the "Allowed Callback URLs" text block. The URL should be - `https://{your-qhub-domain}/auth/realms/qhub/broker/auth0/endpoint`, replacing `{your-qhub-domain}` with your literal domain of course. +3. Under the "Application URIs" section, paste the new OAuth callback URL in the "Allowed Callback URLs" text block. The + URL should be `https://{your-qhub-domain}/auth/realms/qhub/broker/auth0/endpoint`, replacing `{your-qhub-domain}` + with your literal domain of course. @@ -153,31 +165,36 @@ If your QHub deployment relies on Auth0 or GitHub for authentication, please upd 2. Click "OAuth Apps" and then click the app representing your QHub instance. -3. Under "Authorization callback URL", paste the new GitHub callback URL. The URL should be `https://{your-qhub-domain}/auth/realms/qhub/broker/github/endpoint`, replacing - `{your-qhub-domain}` with your literal domain of course. +3. Under "Authorization callback URL", paste the new GitHub callback URL. The URL should be + `https://{your-qhub-domain}/auth/realms/qhub/broker/github/endpoint`, replacing `{your-qhub-domain}` with your + literal domain of course. ## 9. Restore from Backups -Next, you will need to perform the following steps to restore from a previously generated backup, as described in the [Manual Backups documentation](./backup.md): +Next, you will need to perform the following steps to restore from a previously generated backup, as described in the +[Manual Backups documentation](./backup.md): 1. Restore the NFS data from your S3 (or similar) backup -2. Immediately after restoring NFS data, you must run some extra commands as explained in the backup/restore docs for v0.4 upgrades specifically. +2. Immediately after restoring NFS data, you must run some extra commands as explained in the backup/restore docs for + v0.4 upgrades specifically. 3. Restore the JupyterHub SQLite database. ## 10. Import users into Keycloak The last two steps are to: -1. Change the Keycloak `root` user password, documented [here](../installation/login.md#change-keycloak-root-password) and +1. Change the Keycloak `root` user password, documented [here](../installation/login.md#change-keycloak-root-password) + and 2. Import existing users, documented [here](../admin_guide/backup.md#import-keycloak). For more details on this process, visit the [Keycloak docs section](../installation/login.md). ## Known versions that require re-deployment -Version `v0.3.11` on AWS has an error with the Kubernetes config map. See [this GitHub discussion related to AWS K8s config maps](/~https://github.com/Quansight/qhub/discussions/841) -for more details. +Version `v0.3.11` on AWS has an error with the Kubernetes config map. See +[this GitHub discussion related to AWS K8s config maps](/~https://github.com/Quansight/qhub/discussions/841) for more +details. Version `v0.4`. diff --git a/docs/source/admin_guide/clearml.md b/docs/source/admin_guide/clearml.md index e6218fddf6..fdc977d1ee 100644 --- a/docs/source/admin_guide/clearml.md +++ b/docs/source/admin_guide/clearml.md @@ -1,20 +1,24 @@ # ClearML -ClearML integration comes built in with QHub, here is how you would enable this integration. Currently ClearML integration is only supported on Google Cloud Platform. +ClearML integration comes built in with QHub, here is how you would enable this integration. Currently ClearML +integration is only supported on Google Cloud Platform. ## Setting subdomain DNS Record for ClearML -ClearML components requires subdomains, you would need to set a `CNAME` or A record for the following subdomains on your QHub. +ClearML components requires subdomains, you would need to set a `CNAME` or A record for the following subdomains on your +QHub. - app.clearml.your-qhub-domain.com - files.clearml.your-qhub-domain.com - api.clearml.your-qhub-domain.com -These domains are automatically setup for you, if you're using Cloudflare and the args `--dns-provider cloudflare --dns-auto-provision` passed to `qhub deploy`. +These domains are automatically setup for you, if you're using Cloudflare and the args +`--dns-provider cloudflare --dns-auto-provision` passed to `qhub deploy`. ## Create a node group -1. To enable ClearML integration on Google Cloud QHub deployments, simply enable the feature flag within your `qhub-config.yaml` file. For example: +1. To enable ClearML integration on Google Cloud QHub deployments, simply enable the feature flag within your + `qhub-config.yaml` file. For example: ```yaml clearml: @@ -42,7 +46,8 @@ Users can access the ClearML server at: `app.clearml.your-qhub-domain.com` ## Authentication -QHub secures ClearML dashboard by default with JupyterHub OAuth via Traefik ForwardAuth. You can turn it off via a flag in the QHub config YAML: +QHub secures ClearML dashboard by default with JupyterHub OAuth via Traefik ForwardAuth. You can turn it off via a flag +in the QHub config YAML: ```yaml clearml: @@ -54,8 +59,8 @@ This is especially useful for accessing ClearML programmatically. ## Overrides -Addition helm chart variables may want to be overridden. For this an override hook is provided where you can specify anything with the -[values.yaml](/~https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml). +Addition helm chart variables may want to be overridden. For this an override hook is provided where you can specify +anything with the [values.yaml](/~https://github.com/allegroai/clearml-helm-charts/tree/main/charts/clearml). ```yaml clearml: diff --git a/docs/source/admin_guide/cost.md b/docs/source/admin_guide/cost.md index 75539a3491..1124897c59 100644 --- a/docs/source/admin_guide/cost.md +++ b/docs/source/admin_guide/cost.md @@ -1,13 +1,16 @@ # Cloud cost and capabilities -Qhub doesn't charge a fee for infrastructure but cloud providers themselves have pricing for all their services. A digital ocean cluster's minimum fixed cost is around $60/month. -While other cloud providers fixed cost is around $200/month. Each cloud vendor has different pricing and capabilities of their kubernetes offerings which can significantly affect -the pricing of QHub. Cost alone doesn't determine which cloud is best for your use case. Often times you can't choose the cloud that QHub runs on. In this case this document can -help determine a reasonable cost for running QHub. Keep in mind these numbers are a simplified view of pricing and won't reflect your actual bill. +Qhub doesn't charge a fee for infrastructure but cloud providers themselves have pricing for all their services. A +digital ocean cluster's minimum fixed cost is around $60/month. While other cloud providers fixed cost is around +$200/month. Each cloud vendor has different pricing and capabilities of their kubernetes offerings which can +significantly affect the pricing of QHub. Cost alone doesn't determine which cloud is best for your use case. Often +times you can't choose the cloud that QHub runs on. In this case this document can help determine a reasonable cost for +running QHub. Keep in mind these numbers are a simplified view of pricing and won't reflect your actual bill. ## Kubernetes -Often cloud providers have a fixed cost for using kubernetes. Here is a table of capabilities of each cloud kubernetes offering along with costs: +Often cloud providers have a fixed cost for using kubernetes. Here is a table of capabilities of each cloud kubernetes +offering along with costs: | Cloud | Pricing | Scale to 0? | Spot/Preemptible? | GPUs | | :---------------------------------------------------------------------------- | :-------- | :---------- | ----------------- | :--- | @@ -18,7 +21,8 @@ Often cloud providers have a fixed cost for using kubernetes. Here is a table of ## Network costs -All cloud providers charge for egress. Egress is the traffic leaving their cloud service. Additionally QHub sets up a single load balancer that all traffic goes through. +All cloud providers charge for egress. Egress is the traffic leaving their cloud service. Additionally QHub sets up a +single load balancer that all traffic goes through. | Cloud | Egress | Load Balancer | | :-------------------- | :------------ | :------------ | @@ -29,16 +33,21 @@ All cloud providers charge for egress. Egress is the traffic leaving their cloud ## Storage costs -Cloud providers provide many different types of storage. The include S3 like [object storage](https://en.wikipedia.org/wiki/Object_storage), -[block storage](), and traditional [filesystem storage](https://en.wikipedia.org/wiki/File_system). Note that each type of -storage has well known advantages and limitations. - -- Object storage is optimized for cost, bandwidth, and the cost of latency for file access. It directly affects the number of IOPs S3 is capable of. Object storage always provides - the highest bandwidth. It does provide parallel partial access to files. -- Block storage is equivalent to a physical disk attached to your machine. Block storage offers high [IOPs](https://en.wikipedia.org/wiki/IOPS) for latency sensitive filesystem - operations. They offer high bandwidth similar to object storage but at around 2-4 times the cost. -- Filesystem storage enables shared filesystems between multiple compute notes but at significant cost. NFS filesystem have significantly lower IOPS than block storage and - significantly lower bandwidth than object storage. Usually the users choose this option due to needing to share files between multiple machines. This offering should be a last +Cloud providers provide many different types of storage. The include S3 like +[object storage](https://en.wikipedia.org/wiki/Object_storage), +[block storage](), and traditional +[filesystem storage](https://en.wikipedia.org/wiki/File_system). Note that each type of storage has well known +advantages and limitations. + +- Object storage is optimized for cost, bandwidth, and the cost of latency for file access. It directly affects the + number of IOPs S3 is capable of. Object storage always provides the highest bandwidth. It does provide parallel + partial access to files. +- Block storage is equivalent to a physical disk attached to your machine. Block storage offers high + [IOPs](https://en.wikipedia.org/wiki/IOPS) for latency sensitive filesystem operations. They offer high bandwidth + similar to object storage but at around 2-4 times the cost. +- Filesystem storage enables shared filesystems between multiple compute notes but at significant cost. NFS filesystem + have significantly lower IOPS than block storage and significantly lower bandwidth than object storage. Usually the + users choose this option due to needing to share files between multiple machines. This offering should be a last choice due to costing around $0.20/GB. | Cloud | Object | Block | Filesystem | @@ -48,12 +57,14 @@ storage has well known advantages and limitations. | Amazon Web Services | $0.02/GB | $0.05-0.12/GB | $0.30/GB | | Azure | $0.02/GB | $0.6-0.12/GB | $0.16/GB | -Note that these prices can be deceptive to compare. Each cloud providers offering have wildly different guaranteed IOPs, burst IOPS, guaranteed bandwidth, and burst bandwidth. +Note that these prices can be deceptive to compare. Each cloud providers offering have wildly different guaranteed IOPs, +burst IOPS, guaranteed bandwidth, and burst bandwidth. ## Compute costs -Cloud providers have huge offerings of compute instances. And this guide couldn't do it all justice. A standard 4 CPU/16 GB RAM is used to compare the cloud offerings. This should -give a ballpark of the cost of running a compute instance. Note that all compute instances need an attached block storage usually at lest 10 GB. Comparing CPUs isn't a fair +Cloud providers have huge offerings of compute instances. And this guide couldn't do it all justice. A standard 4 CPU/16 +GB RAM is used to compare the cloud offerings. This should give a ballpark of the cost of running a compute instance. +Note that all compute instances need an attached block storage usually at lest 10 GB. Comparing CPUs isn't a fair comparison due to computer architecture and clock rate. | Cloud | 4 GB/16 RAM | GPUs? | ARM? | Max CPUs | Max RAM | @@ -63,4 +74,5 @@ comparison due to computer architecture and clock rate. | Amazon Web Services | $100/month | Yes | Yes | 448 | 6144 | | Azure | $120/month | Yes | No | 120 | 2400 | -The cloud prices are pretty much the same between cloud providers. For smaller instances, that aren't shown in the table, Digital Ocean can save some money. +The cloud prices are pretty much the same between cloud providers. For smaller instances, that aren't shown in the +table, Digital Ocean can save some money. diff --git a/docs/source/admin_guide/custom-helm-charts.md b/docs/source/admin_guide/custom-helm-charts.md index d3586b2d39..f36b7f2f4b 100644 --- a/docs/source/admin_guide/custom-helm-charts.md +++ b/docs/source/admin_guide/custom-helm-charts.md @@ -15,5 +15,6 @@ helm_extensions: enabled: true ``` -The `overrides` section is optional, but corresponds to the helm chart's [values.yaml](https://helm.sh/docs/chart_template_guide/values_files/) file, and allows you to override the -default helm chart settings. +The `overrides` section is optional, but corresponds to the helm chart's +[values.yaml](https://helm.sh/docs/chart_template_guide/values_files/) file, and allows you to override the default helm +chart settings. diff --git a/docs/source/admin_guide/faq.md b/docs/source/admin_guide/faq.md index fe2883a194..5b3c3ae481 100644 --- a/docs/source/admin_guide/faq.md +++ b/docs/source/admin_guide/faq.md @@ -3,18 +3,23 @@ ## On AWS, why do user instances occasionally die ~30 minutes after spinning up a large dask cluster? AWS uses Amazon's Elastic Kubernetes Service for hosting the Kubernetes cluster. -[Elastic Kubernetes Service requires the use of at least two availability zones](https://docs.aws.amazon.com/eks/latest/userguide/infrastructure-security.html). The QHub cluster -has an [autoscaler](https://docs.aws.amazon.com/eks/latest/userguide/cluster-autoscaler.html) that has a default service that automatically balances the number of EC2 instances -between the two availability zones. When large Dask clusters get initialized and destroyed, the autoscaler attempts to reschedule a user pod. This reschedule operation occurs in -the other availability zone. When this happens, Kubernetes doesn't successfully transfer the active pod to the other zone and the pod dies. - -To stop this occurring, the autoscaler service "AZRebalance" needs to be manually suspended. Currently this autoscaler service isn't managed by terraform. Disabling it via the -console is permanent for the life of the cluster. [There is an open issue to permanently fix this via Terraform](/~https://github.com/Quansight/qhub/issues/786) - -To turn off the AZRebalance service, follow the steps in this [AWS documentation](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-suspend-resume-processes.html) to suspend +[Elastic Kubernetes Service requires the use of at least two availability zones](https://docs.aws.amazon.com/eks/latest/userguide/infrastructure-security.html). +The QHub cluster has an [autoscaler](https://docs.aws.amazon.com/eks/latest/userguide/cluster-autoscaler.html) that has +a default service that automatically balances the number of EC2 instances between the two availability zones. When large +Dask clusters get initialized and destroyed, the autoscaler attempts to reschedule a user pod. This reschedule operation +occurs in the other availability zone. When this happens, Kubernetes doesn't successfully transfer the active pod to the +other zone and the pod dies. + +To stop this occurring, the autoscaler service "AZRebalance" needs to be manually suspended. Currently this autoscaler +service isn't managed by terraform. Disabling it via the console is permanent for the life of the cluster. +[There is an open issue to permanently fix this via Terraform](/~https://github.com/Quansight/qhub/issues/786) + +To turn off the AZRebalance service, follow the steps in this +[AWS documentation](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-suspend-resume-processes.html) to suspend the AZRebalance service. -To turn off the AZRebalance service, follow the steps in this [AWS documentation](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-suspend-resume-processes.html) to suspend +To turn off the AZRebalance service, follow the steps in this +[AWS documentation](https://docs.aws.amazon.com/autoscaling/ec2/userguide/as-suspend-resume-processes.html) to suspend the AZRebalance service. ## Can a user deploy an arbitrary pod? @@ -29,5 +34,6 @@ extensions: private: true ``` -This deploys a simple service based on the image provided. name must be a simple terraform-friendly string. It's available on your QHub site at the `/echo` URL, or whatever URL -slug you provide. Users need log-in credentials in if the private is true. +This deploys a simple service based on the image provided. name must be a simple terraform-friendly string. It's +available on your QHub site at the `/echo` URL, or whatever URL slug you provide. Users need log-in credentials in if +the private is true. diff --git a/docs/source/admin_guide/gpu.md b/docs/source/admin_guide/gpu.md index 5d92a1f6b0..c39c518443 100644 --- a/docs/source/admin_guide/gpu.md +++ b/docs/source/admin_guide/gpu.md @@ -1,14 +1,14 @@ # GPUs on QHub -Having access to GPUs is of prime importance for speeding up many computations by several orders of magnitude. QHub provides a way to achieve that, we will go through achieving -that for each Cloud provider. +Having access to GPUs is of prime importance for speeding up many computations by several orders of magnitude. QHub +provides a way to achieve that, we will go through achieving that for each Cloud provider. ## Clouds ### Google Cloud Platform -By default the quota to spin up GPUs on GCP is 0. Make sure you have requested GCP Support to increase quota of allowed GPUs for your billing account to be the number of GPUs you -need access to. +By default the quota to spin up GPUs on GCP is 0. Make sure you have requested GCP Support to increase quota of allowed +GPUs for your billing account to be the number of GPUs you need access to. See [GCP Pre-requisites here](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#requirements) @@ -16,8 +16,9 @@ Here are the changes needed in your `qhub-config.yml` file to get GPUs working w #### 1. Add GPU node group -Add a node group for GPU instance in the `node_groups` section of `google_cloud_platform` section, and under the `guest_accelerators` section add the name of the GPU. A -comprehensive list of GPU types can be found in at the Official GCP docs here: https://cloud.google.com/compute/docs/gpus +Add a node group for GPU instance in the `node_groups` section of `google_cloud_platform` section, and under the +`guest_accelerators` section add the name of the GPU. A comprehensive list of GPU types can be found in at the Official +GCP docs here: https://cloud.google.com/compute/docs/gpus An example of getting GPUs on GCP: @@ -44,13 +45,16 @@ google_cloud_platform: Notes: - One of the restrictions regarding GPUs on GCP is they can only be used with general-purpose - *[N1 machine types](https://cloud.google.com/compute/docs/machine-types#n1_machine_types)*, except A100 GPUs, which are only supported on + *[N1 machine types](https://cloud.google.com/compute/docs/machine-types#n1_machine_types)*, except A100 GPUs, which + are only supported on *[a2 machine types](https://cloud.google.com/blog/products/compute/announcing-google-cloud-a2-vm-family-based-on-nvidia-a100-gpu)* -- If you are not using the gcp provider in QHub but are using gcp (let's say deploying on an existing gcp cluster). You will need to manually install NVIDIA drivers to the cluster - \- see [documentation here](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#installing_drivers). +- If you are not using the gcp provider in QHub but are using gcp (let's say deploying on an existing gcp cluster). You + will need to manually install NVIDIA drivers to the cluster - see + [documentation here](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#installing_drivers). -- See [general limitations of GPUs on Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#limitations). +- See + [general limitations of GPUs on Google Kubernetes Engine](https://cloud.google.com/kubernetes-engine/docs/how-to/gpus#limitations). #### 2. Add GPU instance in the JupyterLab profiles @@ -74,8 +78,8 @@ profiles: ### Amazon Web Services -Here are the changes needed in your `qhub-config.yml` file to get GPUs working with AWS. Unlike GCP gpus are specified in the instance type this is due to how AWS exposes gpus to -the user. +Here are the changes needed in your `qhub-config.yml` file to get GPUs working with AWS. Unlike GCP gpus are specified +in the instance type this is due to how AWS exposes gpus to the user. #### 1. Add GPU node group @@ -112,8 +116,9 @@ profiles: Notes: -- If you are not using the AWS provider in QHub but are using the AWS cloud (let's say deploying on an existing AWS cluster), you will need to manually install NVIDIA drivers to - the cluster. See [documentation here](/~https://github.com/NVIDIA/k8s-device-plugin). +- If you are not using the AWS provider in QHub but are using the AWS cloud (let's say deploying on an existing AWS + cluster), you will need to manually install NVIDIA drivers to the cluster. See + [documentation here](/~https://github.com/NVIDIA/k8s-device-plugin). ### DigitalOcean @@ -125,7 +130,8 @@ Azure does support GPUs in Kubernetes, but QHub doesn't currently have official ## Create conda environment to take advantage of GPUs -First you need to consult the driver version of nvidia being used. This can easily be checked via the command `nvidia-smi`. +First you need to consult the driver version of nvidia being used. This can easily be checked via the command +`nvidia-smi`. ```shell $ nvidia-smi @@ -149,8 +155,9 @@ Thu May 20 18:05:14 2021 +-----------------------------------------------------------------------------+ ``` -The important section is `CUDA Version`. In general you should install a version of cudatoolkit that's less than or equal to the cuda version (but not too old). If you install -`cudatoolkit-dev` and `cudatoolkit` make sure that they are the same version exactly including minor version. Also in the near future cuda should have better +The important section is `CUDA Version`. In general you should install a version of cudatoolkit that's less than or +equal to the cuda version (but not too old). If you install `cudatoolkit-dev` and `cudatoolkit` make sure that they are +the same version exactly including minor version. Also in the near future cuda should have better [ABI compatibility](https://docs.nvidia.com/deploy/cuda-compatibility/index.html). Below is an example gpu environment: @@ -167,5 +174,5 @@ dependencies: - numba ``` -We are working hard to make the GPU experience on Qhub as streamlined as possible. There are many small gotchas when working with GPUs and getting all the drivers installed -properly. +We are working hard to make the GPU experience on Qhub as streamlined as possible. There are many small gotchas when +working with GPUs and getting all the drivers installed properly. diff --git a/docs/source/admin_guide/jupyterhub.md b/docs/source/admin_guide/jupyterhub.md index df5c8e0f09..63640a7816 100644 --- a/docs/source/admin_guide/jupyterhub.md +++ b/docs/source/admin_guide/jupyterhub.md @@ -2,10 +2,12 @@ QHub has the JupyterHub project at its core. -Within the `qhub deploy` step, JupyterHub is installed using the [Zero2JupyterHub Helm package](https://zero-to-jupyterhub.readthedocs.io/). +Within the `qhub deploy` step, JupyterHub is installed using the +[Zero2JupyterHub Helm package](https://zero-to-jupyterhub.readthedocs.io/). -It's possible to specify Helm overrides (i.e. your own values for selected fields in the JupyterHub deployment's `values.yaml` file) from the `qhub-config.yaml` file. However, be -aware that this may conflict with values that are needed to be set in a certain way in order for QHub to operate correctly. +It's possible to specify Helm overrides (i.e. your own values for selected fields in the JupyterHub deployment's +`values.yaml` file) from the `qhub-config.yaml` file. However, be aware that this may conflict with values that are +needed to be set in a certain way in order for QHub to operate correctly. To set a Helm override, for example enabling auth state: @@ -18,11 +20,13 @@ jupyterhub: enable_auth_state: true ``` -Where it's possible to influence a value using 'native' QHub configuration, you should use that as a preference. For example, you would not set -`jupyterhub.overrides.hub.image.name` to use a custom JupyterHub Docker image. Instead you would set `default_images.jupyterhub`. +Where it's possible to influence a value using 'native' QHub configuration, you should use that as a preference. For +example, you would not set `jupyterhub.overrides.hub.image.name` to use a custom JupyterHub Docker image. Instead you +would set `default_images.jupyterhub`. -There is special behavior for the values `jupyterhub.overrides.hub.extraEnv` and `jupyterhub.overrides.hub.extraConfig`. Setting these would have naturally seen them be overridden -in their entirety by QHub's own values, but there is special treatment whereby QHub's values are merged into the list of any values that you might have set as overrides. +There is special behavior for the values `jupyterhub.overrides.hub.extraEnv` and `jupyterhub.overrides.hub.extraConfig`. +Setting these would have naturally seen them be overridden in their entirety by QHub's own values, but there is special +treatment whereby QHub's values are merged into the list of any values that you might have set as overrides. -In general, it is possible that other overrides will always be lost where QHub sets its own values, so caution must be taken, and in debugging ensure that you are prepared for -unexpected results when using overrides. +In general, it is possible that other overrides will always be lost where QHub sets its own values, so caution must be +taken, and in debugging ensure that you are prepared for unexpected results when using overrides. diff --git a/docs/source/admin_guide/keycloak.md b/docs/source/admin_guide/keycloak.md index 31e260af12..82d40082fc 100644 --- a/docs/source/admin_guide/keycloak.md +++ b/docs/source/admin_guide/keycloak.md @@ -2,10 +2,12 @@ QHub includes a deployment of [Keycloak](https://www.keycloak.org/documentation.html) to centralise user management. -Within the `qhub deploy` step, Keycloak is installed using the [Helm chart](/~https://github.com/codecentric/helm-charts/tree/master/charts/keycloak). +Within the `qhub deploy` step, Keycloak is installed using the +[Helm chart](/~https://github.com/codecentric/helm-charts/tree/master/charts/keycloak). -It's possible to specify Helm overrides (i.e. your own values for selected fields in the Keycloak deployment's `values.yaml` file) from the `qhub-config.yaml` file. However, be -aware that this may conflict with values that are needed to be set in a certain way in order for QHub to operate correctly. +It's possible to specify Helm overrides (i.e. your own values for selected fields in the Keycloak deployment's +`values.yaml` file) from the `qhub-config.yaml` file. However, be aware that this may conflict with values that are +needed to be set in a certain way in order for QHub to operate correctly. To set a Helm override, for example: @@ -26,10 +28,12 @@ security: repository: dockerusername/my-qhub-keycloak ``` -If you do set `overrides.extraEnv` as above, you must remember to include `PROXY_ADDRESS_FORWARDING=true`. Otherwise, the Keycloak deployment will not work as you will have -overridden an important default Helm value that's required by QHub. +If you do set `overrides.extraEnv` as above, you must remember to include `PROXY_ADDRESS_FORWARDING=true`. Otherwise, +the Keycloak deployment will not work as you will have overridden an important default Helm value that's required by +QHub. To find out more about using Keycloak in QHub, see [Installation - Login](../installation/login.md) -The `security.keycloak.realm_display_name` setting is the text to display on the Keycloak login page for your QHub (and in some other locations). This is optional, and if omitted -will default to "QHub \" where `project_name` is a field in the `qhub-config.yaml` file. +The `security.keycloak.realm_display_name` setting is the text to display on the Keycloak login page for your QHub (and +in some other locations). This is optional, and if omitted will default to "QHub \" where `project_name` +is a field in the `qhub-config.yaml` file. diff --git a/docs/source/admin_guide/monitoring.md b/docs/source/admin_guide/monitoring.md index ab31f195f0..e37d82f6d6 100644 --- a/docs/source/admin_guide/monitoring.md +++ b/docs/source/admin_guide/monitoring.md @@ -4,19 +4,22 @@ Cluster monitoring via Grafana/Prometheus comes built in with QHub. It's enabled ## Accessing the Grafana dashboards -Users can access the monitoring dashboards via Grafana at: `your-qhub-domain.com/monitoring`. The initial login credentials are username: `admin` and password: `prom-operator`, but -users should change the administrator password immediately after the first log in. +Users can access the monitoring dashboards via Grafana at: `your-qhub-domain.com/monitoring`. The initial login +credentials are username: `admin` and password: `prom-operator`, but users should change the administrator password +immediately after the first log in. -More than 25 prebuilt dashboards come with Qhub. To access them, hover over the dashboards icon, then click "Manage" as indicated in the image below. +More than 25 prebuilt dashboards come with Qhub. To access them, hover over the dashboards icon, then click "Manage" as +indicated in the image below. ![See Existing Dashboards](../images/grafana_manage_dashboards.png) -For example, the General/Kubernetes/Compute Resources/Node/Pods dashboard allows you to easily see which pods are using the most compute and memory on a particular node of your -kubernetes cluster. +For example, the General/Kubernetes/Compute Resources/Node/Pods dashboard allows you to easily see which pods are using +the most compute and memory on a particular node of your kubernetes cluster. ![Grafana Node Cpu Usage Dashboard](../images/grafana_node_cpu_usage.png) -Additionally, the General/Kubernetes/Networking/Cluster/Pods dashboard gives a nice overview of network traffic in your kubernetes cluster. +Additionally, the General/Kubernetes/Networking/Cluster/Pods dashboard gives a nice overview of network traffic in your +kubernetes cluster. ![Grafana Cluster Networking Dashboard](../images/grafana_networking_dashboard.png) @@ -24,7 +27,8 @@ Select the [Grafana documentation](https://grafana.com/docs/grafana/latest/) for ## Disabling Cluster monitoring -1. To turn off the cluster monitoring on QHub deployments, simply turn off the feature flag within your `qhub-config.yaml` file. For example: +1. To turn off the cluster monitoring on QHub deployments, simply turn off the feature flag within your + `qhub-config.yaml` file. For example: ```yaml monitoring: diff --git a/docs/source/admin_guide/preemptible-spot-instances.md b/docs/source/admin_guide/preemptible-spot-instances.md index 0462edaad6..358a65bd25 100644 --- a/docs/source/admin_guide/preemptible-spot-instances.md +++ b/docs/source/admin_guide/preemptible-spot-instances.md @@ -1,8 +1,10 @@ # Preemptible and Spot instances on QHub -A preemptible or spot VM is an instance that you can create and run at a much lower price than normal instances. Azure and Google Cloud platform use the term preemptible, while AWS -uses the term spot, and Digital Ocean doesn't support these types of instances. However, the cloud provider might stop these instances if it requires access to those resources for -other tasks. Preemptible instances are excess Cloud Provider's capacity, so their availability varies with usage. +A preemptible or spot VM is an instance that you can create and run at a much lower price than normal instances. Azure +and Google Cloud platform use the term preemptible, while AWS uses the term spot, and Digital Ocean doesn't support +these types of instances. However, the cloud provider might stop these instances if it requires access to those +resources for other tasks. Preemptible instances are excess Cloud Provider's capacity, so their availability varies with +usage. ## Usage diff --git a/docs/source/admin_guide/prefect.md b/docs/source/admin_guide/prefect.md index 4000ba9404..ac114c0101 100644 --- a/docs/source/admin_guide/prefect.md +++ b/docs/source/admin_guide/prefect.md @@ -19,7 +19,8 @@ There are a bunch of components in getting Prefect working for you, here is a br 1. Create a free Prefect cloud account here: https://cloud.prefect.io/ 2. Create a Service Account and an API key for the same and add this to the CI secrets as `QHUB_SECRET_PREFECT_TOKEN`: - - In GitHub: Set it in [Secrets](https://docs.github.com/en/actions/reference/encrypted-secrets#creating-encrypted-secrets-for-a-repository) + - In GitHub: Set it in + [Secrets](https://docs.github.com/en/actions/reference/encrypted-secrets#creating-encrypted-secrets-for-a-repository) - In GitLab: Set it as [Variables](https://docs.gitlab.com/ee/ci/variables/#gitlab-cicd-variables) 3. Create a project in the Prefect Cloud Dashboard. Alternatively from CLI: @@ -27,27 +28,32 @@ There are a bunch of components in getting Prefect working for you, here is a br prefect create project 'your-prefect-project-name' ``` -The `TF_VAR_prefect_token` API key is set as `PREFECT__CLOUD__AGENT__AUTH_TOKEN` environment variable in the agent. It's used while deploying Prefect Agent so that it can connect -to Prefect Cloud and query flows. +The `TF_VAR_prefect_token` API key is set as `PREFECT__CLOUD__AGENT__AUTH_TOKEN` environment variable in the agent. It's +used while deploying Prefect Agent so that it can connect to Prefect Cloud and query flows. ## Prefect Cloud -Prefect Cloud is a fully hosted, production-ready backend for Prefect Core. Checkout [prefect documentation](https://docs.prefect.io/orchestration/#prefect-cloud) to know more. +Prefect Cloud is a fully hosted, production-ready backend for Prefect Core. Checkout +[prefect documentation](https://docs.prefect.io/orchestration/#prefect-cloud) to know more. ## Prefect Agent -Prefect Agents is a lightweight processes for orchestrating flow runs. Agents run inside a user's architecture, and are responsible for starting and monitoring flow runs. During -operation the agent process queries the Prefect API for any scheduled flow runs, and allocates resources for them on their respective deployment platforms. +Prefect Agents is a lightweight processes for orchestrating flow runs. Agents run inside a user's architecture, and are +responsible for starting and monitoring flow runs. During operation the agent process queries the Prefect API for any +scheduled flow runs, and allocates resources for them on their respective deployment platforms. -When you enable prefect via `qhub-config.yml` prefect agent is deployed on the QHub's kubernetes cluster, which queries the Prefect Cloud for flow runs. +When you enable prefect via `qhub-config.yml` prefect agent is deployed on the QHub's kubernetes cluster, which queries +the Prefect Cloud for flow runs. ## Agent configuration overrides -You can override your agent configuration without having to modify the helm files directly. The extra variable `overrides` makes this possible by changing the default values for -the Agent chart according to the settings presented on your qhub-config.yaml file. +You can override your agent configuration without having to modify the helm files directly. The extra variable +`overrides` makes this possible by changing the default values for the Agent chart according to the settings presented +on your qhub-config.yaml file. -The current variables, originally available in the [Agent helm chart](/~https://github.com/PrefectHQ/server/blob/master/helm/prefect-server/templates/agent/deployment.yaml) that can -be overridden include: +The current variables, originally available in the +[Agent helm chart](/~https://github.com/PrefectHQ/server/blob/master/helm/prefect-server/templates/agent/deployment.yaml) +that can be overridden include: ``` - IMAGE_PULL_SECRETS @@ -59,7 +65,8 @@ be overridden include: - IMAGE_PULL_POLICY ``` -For example, if you just want to override the amount of CPU limits for each job, you would need to craft a declarative configuration, in you qhub-config.yaml file, as follows: +For example, if you just want to override the amount of CPU limits for each job, you would need to craft a declarative +configuration, in you qhub-config.yaml file, as follows: ```yaml prefect: @@ -72,8 +79,9 @@ prefect: cpu: 4 ``` -Also, if you would like to include an extra variable to the agent environment configuration, that was not previously in the helm chart, you can do it by including it under the -`envVars` field in the overrides block. For example, if you would like to add `MY_VAR: ""` to you agent environment, you can do so by adding the following to your +Also, if you would like to include an extra variable to the agent environment configuration, that was not previously in +the helm chart, you can do it by including it under the `envVars` field in the overrides block. For example, if you +would like to add `MY_VAR: ""` to you agent environment, you can do so by adding the following to your qhub-config ```yaml @@ -87,8 +95,9 @@ prefect: ### Adding secrets to your Agent configuration Overrides also allow you to define extra secrets to pass through your agent configuration, for example, when using -[default secrets](https://docs.prefect.io/core/concepts/secrets.html#default-secrets) to automatically authenticate your flow with the listed service. In the Google cloud case, for -`GCP_CREDENTIALS` context secret, you can do it by adding that specific key value pair into your configuration: +[default secrets](https://docs.prefect.io/core/concepts/secrets.html#default-secrets) to automatically authenticate your +flow with the listed service. In the Google cloud case, for `GCP_CREDENTIALS` context secret, you can do it by adding +that specific key value pair into your configuration: ```yaml prefect: @@ -98,12 +107,14 @@ prefect: PREFECT__CONTEXT__SECRETS__GCP_CREDENTIALS: '' ``` -This secret will then be stored as a [kubernetes secret](https://kubernetes.io/docs/concepts/configuration/secret/) variable into you QHub secrets volume. +This secret will then be stored as a [kubernetes secret](https://kubernetes.io/docs/concepts/configuration/secret/) +variable into you QHub secrets volume. ## Flows -Prefect agent can only orchestrate your flows, you need an actual flow to run via prefect agent. The API for the same can be found in the -[prefect documentation](https://docs.prefect.io/core/concepts/flows.html) Here is a simple example from their official doc: +Prefect agent can only orchestrate your flows, you need an actual flow to run via prefect agent. The API for the same +can be found in the [prefect documentation](https://docs.prefect.io/core/concepts/flows.html) Here is a simple example +from their official doc: ```python from prefect import task, Task, Flow @@ -124,8 +135,9 @@ with Flow('My Functional Flow') as flow: ## Storage -The Prefect Storage interface encapsulates logic for storing flows. Each storage unIt's able to store multiple flows (with the constraint of name uniqueness within a given unit). -The API documentation for the same can be found in the [prefect documentation](https://docs.prefect.io/api/latest/storage.html#docker) +The Prefect Storage interface encapsulates logic for storing flows. Each storage unIt's able to store multiple flows +(with the constraint of name uniqueness within a given unit). The API documentation for the same can be found in the +[prefect documentation](https://docs.prefect.io/api/latest/storage.html#docker) ## Example: Creating, Building and Register Flow @@ -221,7 +233,8 @@ if __name__ == "__main__": ## Running your flows -Now that you have Prefect Agent running in QHub Kubernetes cluster, you can now run your flows from either of the two ways: +Now that you have Prefect Agent running in QHub Kubernetes cluster, you can now run your flows from either of the two +ways: - Triggering manually from the Prefect Cloud dashboard. - Running them on a schedule by adding a parameter to you flow. You can read more about it in the diff --git a/docs/source/admin_guide/system_maintenance.md b/docs/source/admin_guide/system_maintenance.md index 63e66afec2..518e390181 100644 --- a/docs/source/admin_guide/system_maintenance.md +++ b/docs/source/admin_guide/system_maintenance.md @@ -6,24 +6,26 @@ All modifications to the infrastructure should be done with GitHub Pull-Requests ### Modifying docker images: jupyterlab, jupyterhub, dask-workers -The docker images used for dask-worker and jupyterlab user environments are pulled from a docker container registry. The images are built based on the images specified in the -`image` folder. There are three images that are currently built +The docker images used for dask-worker and jupyterlab user environments are pulled from a docker container registry. The +images are built based on the images specified in the `image` folder. There are three images that are currently built - jupyterlab :: modification of jupyterlab instances for each user - dask-worker :: modification of dask workers and dask scheduler - jupyterhub :: the jupyterhub server (allows for customization of hub UI) - conda-store :: Environment management tool for QHub -Each docker image is customized with its respective directory (for example `image/Dockerfile.jupyterlab` -> `image/jupyterlab/*`. For jupyterlab the environment is located at -`image/jupyterlab/environment.yaml`. Thus to add a package to the environment simply submit a pull request with the new package. +Each docker image is customized with its respective directory (for example `image/Dockerfile.jupyterlab` -> +`image/jupyterlab/*`. For jupyterlab the environment is located at `image/jupyterlab/environment.yaml`. Thus to add a +package to the environment simply submit a pull request with the new package. -At this current point in time once a user submits a pull request to create the given docker image and the PR is accepted with images built, a PR must follow that adds the image to -the qhub deployment. This can be done by modifying `infrastructure/variables.tf` and the configuration file. +At this current point in time once a user submits a pull request to create the given docker image and the PR is accepted +with images built, a PR must follow that adds the image to the qhub deployment. This can be done by modifying +`infrastructure/variables.tf` and the configuration file. ### Adding additional worker nodegroups -Adding additional nodegroups can be done by editing the configuration file. While a `general`, `user`, and `worker` nodegroup are required you may create any additional node group. -Take for example the Digital Ocean configuration. +Adding additional nodegroups can be done by editing the configuration file. While a `general`, `user`, and `worker` +nodegroup are required you may create any additional node group. Take for example the Digital Ocean configuration. ```yaml digital_ocean: @@ -44,7 +46,8 @@ digital_ocean: max_nodes: 5 ``` -To add a node group for a node group called `worker-high-mem` simply add to the configuration. The same applies for AWS, GCP, and DO. +To add a node group for a node group called `worker-high-mem` simply add to the configuration. The same applies for AWS, +GCP, and DO. ```yaml digital_ocean: @@ -71,8 +74,9 @@ digital_ocean: ### Setting specific JupyterLab profile to run on a nodegroup -Sometimes we would like a profile to execute on nodes that are not in the normal nodegroup. In the example above we created a high memory node group. To make the jupyterlab profile -`small worker` use the high memory nodegroup do the following. +Sometimes we would like a profile to execute on nodes that are not in the normal nodegroup. In the example above we +created a high memory node group. To make the jupyterlab profile `small worker` use the high memory nodegroup do the +following. ```yaml profiles: @@ -118,7 +122,8 @@ profiles: ### Setting specific dask workers to run on a nodegroup -Suppose we want a specific dask worker profile to run on a specific node group. Here we demonstrate annotating the DO example configuration. +Suppose we want a specific dask worker profile to run on a specific node group. Here we demonstrate annotating the DO +example configuration. ```yaml profiles: @@ -131,8 +136,9 @@ profiles: image: "quansight/qhub-dask-worker:v||QHUB_VERSION||" ``` -[Dask-gateway](https://gateway.dask.org/api-server.html#kube-cluster-config) takes additional configuration for the scheduler pods and workers. Remember similar to assigning node -groups to specific jupyterlab instances we must get the key for the node pool. +[Dask-gateway](https://gateway.dask.org/api-server.html#kube-cluster-config) takes additional configuration for the +scheduler pods and workers. Remember similar to assigning node groups to specific jupyterlab instances we must get the +key for the node pool. - AWS :: `eks.amazonaws.com/nodegroup` - GCP :: `cloud.google.com/gke-nodepool` diff --git a/docs/source/admin_guide/traefik.md b/docs/source/admin_guide/traefik.md index 54dcd5922c..e00637e966 100644 --- a/docs/source/admin_guide/traefik.md +++ b/docs/source/admin_guide/traefik.md @@ -4,9 +4,11 @@ ### Creating the certificate -[Lego](https://go-acme.github.io/lego/installation/) is a command line tool for provisioning certificates for a domain. If you are trying to install QHub within an enterprise you -may need to contact someone in IT to create the certificate and key-pair for you. Ensure that this certificate has all of the domains that QHub is running on. Lego supports -[multiple DNS providers](https://go-acme.github.io/lego/dns/). For this example we will assume Cloudflare as your DNS provider. +[Lego](https://go-acme.github.io/lego/installation/) is a command line tool for provisioning certificates for a domain. +If you are trying to install QHub within an enterprise you may need to contact someone in IT to create the certificate +and key-pair for you. Ensure that this certificate has all of the domains that QHub is running on. Lego supports +[multiple DNS providers](https://go-acme.github.io/lego/dns/). For this example we will assume Cloudflare as your DNS +provider. ```shell export CLOUDFLARE_DNS_API_TOKEN=1234567890abcdefghijklmnopqrstuvwxyz diff --git a/docs/source/admin_guide/troubleshooting.md b/docs/source/admin_guide/troubleshooting.md index f9f29346b4..a1df3786f1 100644 --- a/docs/source/admin_guide/troubleshooting.md +++ b/docs/source/admin_guide/troubleshooting.md @@ -4,8 +4,9 @@ This guide aims to provide useful information to developers in the detection and ## General Troubleshooting -To minimize the occurrence of errors on your QHub application, please follow the best practices described on the [Installation](../installation/installation.md), -[Setup](../installation/setup.md) and [Usage](../installation/usage.md) sections. +To minimize the occurrence of errors on your QHub application, please follow the best practices described on the +[Installation](../installation/installation.md), [Setup](../installation/setup.md) and [Usage](../installation/usage.md) +sections. ### Solutions for common problems @@ -13,10 +14,12 @@ To minimize the occurrence of errors on your QHub application, please follow the ##### Digital Ocean -To get the kubernetes context to interact with a Digital Ocean cluster use the [following instructions](https://www.digitalocean.com/docs/kubernetes/how-to/connect-to-cluster/). +To get the kubernetes context to interact with a Digital Ocean cluster use the +[following instructions](https://www.digitalocean.com/docs/kubernetes/how-to/connect-to-cluster/). 1. [Download Digital Ocean command line utility](https://www.digitalocean.com/docs/apis-clis/doctl/how-to/install/) -2. [Create Digital Ocean API Token](https://www.digitalocean.com/docs/apis-clis/doctl/how-to/install/) likely already done +2. [Create Digital Ocean API Token](https://www.digitalocean.com/docs/apis-clis/doctl/how-to/install/) likely already + done 3. [doctl access via api token](https://www.digitalocean.com/docs/apis-clis/doctl/how-to/install/) `doctl auth init` 4. `doctl kubernetes cluster kubeconfig save "-"` @@ -24,7 +27,8 @@ After completing these steps. `kubectl` should be able to access the cluster. ##### Google Cloud Platform -To get the kubernetes context to interact with a GCP use the [following instructions](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl). +To get the kubernetes context to interact with a GCP use the +[following instructions](https://cloud.google.com/kubernetes-engine/docs/how-to/cluster-access-for-kubectl). 1. [Download the GCP SDK](https://cloud.google.com/sdk/downloads) 2. Login to GCP via `gcloud init` @@ -34,32 +38,39 @@ After completing these steps. `kubectl` should be able to access the cluster. ##### Amazon Web Services -To get the kubernetes context to interact with a AWS use the [following instructions](https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html). +To get the kubernetes context to interact with a AWS use the +[following instructions](https://docs.aws.amazon.com/eks/latest/userguide/create-kubeconfig.html). 1. Download the [aws command line](https://aws.amazon.com/cli/) -2. [Create AWS Access Key and Secret Key](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/) likely already done +2. [Create AWS Access Key and Secret Key](https://aws.amazon.com/premiumsupport/knowledge-center/create-access-key/) + likely already done 3. `aws eks --region update-kubeconfig --name -` After completing these steps. `kubectl` should be able to access the cluster. #### Debug your Kubernetes cluster -[`k9s`](https://k9scli.io/) is a terminal-based UI to manage Kubernetes clusters that aims to simplify navigating, observing, and managing your applications in `k8s`. `k9s` -continuously monitors Kubernetes clusters for changes and provides shortcut commands to interact with the observed resources becoming a fast way to review and resolve day-to-day -issues in Kubernetes. It's definitely a huge improvement to the general workflow, and a best-to-have tool for debugging your Kubernetes cluster sessions. +[`k9s`](https://k9scli.io/) is a terminal-based UI to manage Kubernetes clusters that aims to simplify navigating, +observing, and managing your applications in `k8s`. `k9s` continuously monitors Kubernetes clusters for changes and +provides shortcut commands to interact with the observed resources becoming a fast way to review and resolve day-to-day +issues in Kubernetes. It's definitely a huge improvement to the general workflow, and a best-to-have tool for debugging +your Kubernetes cluster sessions. -Installation can be done on macOS, Windows, and Linux. Instructions for each operating system can be found [here](/~https://github.com/derailed/k9s). Complete the installation to -follow along. +Installation can be done on macOS, Windows, and Linux. Instructions for each operating system can be found +[here](/~https://github.com/derailed/k9s). Complete the installation to follow along. -By default, `k9s` starts with the standard directory that's set as the context (in this case Minikube). To view all the current process press `0`: +By default, `k9s` starts with the standard directory that's set as the context (in this case Minikube). To view all the +current process press `0`: ![Image of the terminal UI](../images/k9s_UI.png) -> **NOTE**: In some circumstances you will be confronted with the need to inspect any services launched by your cluster at your ‘localhost’. For instance, if your cluster has -> problem with the network traffic tunnel configuration, it may limit or block the user's access to destination resources over the connection. +> **NOTE**: In some circumstances you will be confronted with the need to inspect any services launched by your cluster +> at your ‘localhost’. For instance, if your cluster has problem with the network traffic tunnel configuration, it may +> limit or block the user's access to destination resources over the connection. -`k9s` port-forward option shift + f allows you to access and interact with internal Kubernetes cluster processes from your localhost you can then use this -method to investigate issues and adjust your services locally without the need to expose them beforehand. +`k9s` port-forward option shift + f allows you to access and interact with internal Kubernetes +cluster processes from your localhost you can then use this method to investigate issues and adjust your services +locally without the need to expose them beforehand. ______________________________________________________________________ @@ -69,9 +80,11 @@ ______________________________________________________________________ #### JupyterHub Theme -The QHub theme was originally based off the [work of the pangeo team](/~https://github.com/pangeo-data/pangeo-custom-jupyterhub-templates) and is now located in -[github.com/Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme/). For simple modifications to the jupyterhub theme we suggest only editing -`infrastructure/jupyterhub.yaml` and the value `c.JupyterHub.template_vars`. For most use cases this should provide enough flexibility. +The QHub theme was originally based off the +[work of the pangeo team](/~https://github.com/pangeo-data/pangeo-custom-jupyterhub-templates) and is now located in +[github.com/Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme/). For simple +modifications to the jupyterhub theme we suggest only editing `infrastructure/jupyterhub.yaml` and the value +`c.JupyterHub.template_vars`. For most use cases this should provide enough flexibility. ```yaml hub: @@ -85,20 +98,23 @@ hub: } ``` -For more serious modifications to the jupyterhub theme you will need to fork [Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme) and edit the -jupyterhub Dockerfile located at `image/Dockerfile.jupyterhub`. Modify the `THEME_OWNER`, `THEME_REPO`, and `THEME_REV`. This should change the Dockerfile to use your new theme. -The [Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme) has detailed documentation. +For more serious modifications to the jupyterhub theme you will need to fork +[Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme) and edit the jupyterhub Dockerfile +located at `image/Dockerfile.jupyterhub`. Modify the `THEME_OWNER`, `THEME_REPO`, and `THEME_REV`. This should change +the Dockerfile to use your new theme. The +[Quansight/qhub-jupyterhub-theme](/~https://github.com/Quansight/qhub-jupyterhub-theme) has detailed documentation. #### JupyterLab Theme -Setting the JupyterLab theme is done via extensions. Edit the `image/postBuild` script to include the jupyterlab extension in the build. Within the `image` directory run the -following to build JupyterLab. +Setting the JupyterLab theme is done via extensions. Edit the `image/postBuild` script to include the jupyterlab +extension in the build. Within the `image` directory run the following to build JupyterLab. ```shell docker build -f Docker.jupyterlab -t Quansight/qhub-jupyterlab:latest . ``` -Finally, you can test the resulting image via the following docker command and open your web browser to `localhost:8000`. +Finally, you can test the resulting image via the following docker command and open your web browser to +`localhost:8000`. ```shell docker run -p 8000:8000 -it Quansight/qhub-jupyterlab:latest jupyter lab --port 8000 --ip 0.0.0.0 @@ -106,10 +122,11 @@ docker run -p 8000:8000 -it Quansight/qhub-jupyterlab:latest jupyter lab --port ### Using a Private AWS ECR Container Registry -By default, images such as the default JupyterLab image specified as `quansight/qhub-jupyterhub:v||QHUB_VERSION||` will be pulled from Docker Hub. +By default, images such as the default JupyterLab image specified as `quansight/qhub-jupyterhub:v||QHUB_VERSION||` will +be pulled from Docker Hub. -To specify a private AWS ECR (and this technique should work regardless of which cloud your QHub is deployed to), first provide details of the ECR and AWS access keys in -`qhub-config.yaml`: +To specify a private AWS ECR (and this technique should work regardless of which cloud your QHub is deployed to), first +provide details of the ECR and AWS access keys in `qhub-config.yaml`: ```yaml external_container_reg: @@ -120,5 +137,6 @@ external_container_reg: extcr_region: us-west-1 ``` -This will mean you can specify private Docker images such as `12345678.dkr.ecr.us-west-1.amazonaws.com/quansight/qhub-jupyterlab:mytag` in your `qhub-config.yaml` file. The AWS key +This will mean you can specify private Docker images such as +`12345678.dkr.ecr.us-west-1.amazonaws.com/quansight/qhub-jupyterlab:mytag` in your `qhub-config.yaml` file. The AWS key and secret provided must have relevant ecr IAMS permissions to authenticate and read from the ECR container registry. diff --git a/docs/source/admin_guide/upgrade.md b/docs/source/admin_guide/upgrade.md index 2b9b82c7ed..abee7eddf6 100644 --- a/docs/source/admin_guide/upgrade.md +++ b/docs/source/admin_guide/upgrade.md @@ -4,17 +4,20 @@ This is a guide to the general upgrade of QHub to a new version. You should always [backup your data](./backup.md) before upgrading. -> Note that for some releases (e.g. to v0.4), the cluster cannot be upgraded in-situ so you must perform a redeployment (backup the old cluster, redeploy a new upgraded cluster and -> then restore your data). +> Note that for some releases (e.g. to v0.4), the cluster cannot be upgraded in-situ so you must perform a redeployment +> (backup the old cluster, redeploy a new upgraded cluster and then restore your data). > > To perform a redeployment upgrade see the [breaking upgrade documentation](./breaking-upgrade.md). -Here we suppose a user would like to upgrade to a version ``, probably the latest full release of [QHub on PyPI](https://pypi.org/project/qhub/). +Here we suppose a user would like to upgrade to a version ``, probably the latest full release of +[QHub on PyPI](https://pypi.org/project/qhub/). -You may be deploying QHub based on a local configuration file, or you may be using CI/CD workflows in GitHub or GitLab. Either way, you will need to locate a copy of your -`qhub-config.yaml` configuration file to upgrade it (and commit back to your git repo in the CI/CD case). +You may be deploying QHub based on a local configuration file, or you may be using CI/CD workflows in GitHub or GitLab. +Either way, you will need to locate a copy of your `qhub-config.yaml` configuration file to upgrade it (and commit back +to your git repo in the CI/CD case). -For CI/CD deployments, you will need to `git clone ` into a folder on your local machine if you haven't done so already. +For CI/CD deployments, you will need to `git clone ` into a folder on your local machine if you haven't done +so already. ## Step 1: Upgrade the `qhub` command package @@ -38,34 +41,39 @@ In the folder containing your QHub configuration file, run: qhub upgrade -c qhub-config.yaml ``` -This will output a newer version of `qhub-config.yaml` that's compatible with the new version of `qhub`. The process outputs a list of changes it has made. The `upgrade` command -creates a copy of the original unmodified config file (`qhub-config.yaml.old.backup`) as well as any other files that may be required by the upgraded cluster (if any). +This will output a newer version of `qhub-config.yaml` that's compatible with the new version of `qhub`. The process +outputs a list of changes it has made. The `upgrade` command creates a copy of the original unmodified config file +(`qhub-config.yaml.old.backup`) as well as any other files that may be required by the upgraded cluster (if any). ## Step 3: Validate special customizations to `qhub-config.yaml` -You may have made special customizations to your `qhub-config.yaml`, such as using your own versions of Docker images. Please check your `qhub-config.yaml` and decide if you need -to update any values that would not have been changed automatically - or, for example, you may need to build new versions of your custom Docker images to match any changes in +You may have made special customizations to your `qhub-config.yaml`, such as using your own versions of Docker images. +Please check your `qhub-config.yaml` and decide if you need to update any values that would not have been changed +automatically - or, for example, you may need to build new versions of your custom Docker images to match any changes in QHub's images. ## Step 4: Redeploy QHub -If you are deploying QHub from your local machine (not using CI/CD) then you will now have a `qhub-config.yaml` file that you can deploy. +If you are deploying QHub from your local machine (not using CI/CD) then you will now have a `qhub-config.yaml` file +that you can deploy. ```shell qhub deploy -c qhub-config.yaml ``` -At this point you may see an error message saying that deployment is prevented due to the `prevent_deploy` setting in your YAML file. This is a safeguard to ensure that you only -proceed if you are aware of possible breaking changes in the current upgrade. +At this point you may see an error message saying that deployment is prevented due to the `prevent_deploy` setting in +your YAML file. This is a safeguard to ensure that you only proceed if you are aware of possible breaking changes in the +current upgrade. -For example, we may be aware that you will lose data due to this upgrade, so need to note a specific upgrade process to keep your data safe. Always check the release notes of the -release in this case and get in touch with us if you need assistance. For example, you may find that your existing cluster is intentionally deleted so that a new replacement can be -deployed instead, in which case your data must be backed up so it can be restored after the upgrade. +For example, we may be aware that you will lose data due to this upgrade, so need to note a specific upgrade process to +keep your data safe. Always check the release notes of the release in this case and get in touch with us if you need +assistance. For example, you may find that your existing cluster is intentionally deleted so that a new replacement can +be deployed instead, in which case your data must be backed up so it can be restored after the upgrade. ### CI/CD: render and commit to git -For CI/CD (GitHub/GitLab) workflows, then as well as generating the updated `qhub-config.yaml` files as above, you will also need to regenerate the workflow files based on the -latest `qhub` version's templates. +For CI/CD (GitHub/GitLab) workflows, then as well as generating the updated `qhub-config.yaml` files as above, you will +also need to regenerate the workflow files based on the latest `qhub` version's templates. With the newly upgraded `qhub-config.yaml` file, run: @@ -75,4 +83,5 @@ qhub render -c qhub-config.yaml (Note that `qhub deploy` would perform this render step too, but will also immediately redeploy your QHub.) -Commit all the files (`qhub-config.yaml` and GitHub/GitLab workflow files) back to the remote repo. All files need to be committed together in the same commit. +Commit all the files (`qhub-config.yaml` and GitHub/GitLab workflow files) back to the remote repo. All files need to be +committed together in the same commit. diff --git a/docs/source/dev_guide/architecture.md b/docs/source/dev_guide/architecture.md index aa50a92531..5e6fad4a9d 100644 --- a/docs/source/dev_guide/architecture.md +++ b/docs/source/dev_guide/architecture.md @@ -1,7 +1,8 @@ # Developer docs -QHub admins are **DevOps engineers**, **system administrators**, **scientists**, and **network architects** who are responsible for the critical infrastructure that data scientists -and engineers need to thrive. QHub is bundled with features that make installation easy while providing the ability to scale with your organization and data. +QHub admins are **DevOps engineers**, **system administrators**, **scientists**, and **network architects** who are +responsible for the critical infrastructure that data scientists and engineers need to thrive. QHub is bundled with +features that make installation easy while providing the ability to scale with your organization and data. > The content below is particularly for QHub producers, and those looking to learn more about the QHub architecture. @@ -23,43 +24,52 @@ After the installation, the next step is to configure QHub. ## Configuration -QHub is entirely controlled from a configuration file, which allows you to manage multiple environments and multiple teams, as well as their permissions and authorization in a -robust way. +QHub is entirely controlled from a configuration file, which allows you to manage multiple environments and multiple +teams, as well as their permissions and authorization in a robust way. - **The Configuration File** - - QHub comes with configuration file templates for each of the cloud providers it currently supports: **AWS**, **DO**, **GCP**, and **Azure**. The templates can be found - [**here**](../installation/configuration.md). - - You can create a simple qhub-config.yaml configuration panel as a starting point using the `qhub init` command as [described here](../installation/usage.md). + - QHub comes with configuration file templates for each of the cloud providers it currently supports: **AWS**, **DO**, + **GCP**, and **Azure**. The templates can be found [**here**](../installation/configuration.md). + - You can create a simple qhub-config.yaml configuration panel as a starting point using the `qhub init` command as + [described here](../installation/usage.md). ## Why QHub -With QHub, managing configurable data science environments and attaining seamless deployment with [**Github Actions**](/~https://github.com/marketplace/actions/deployment-action) -become remarkably easy. Let's look at how you can customize QHub for a data science architecture that meets **your team's needs**. +With QHub, managing configurable data science environments and attaining seamless deployment with +[**Github Actions**](/~https://github.com/marketplace/actions/deployment-action) become remarkably easy. Let's look at how +you can customize QHub for a data science architecture that meets **your team's needs**. ## Staging & Production Environments and Shell Access -With QHub, you can have shell access and remote editing access through KubeSSH. The complete linux style permissions allows for different shared folders for different groups of -users. +With QHub, you can have shell access and remote editing access through KubeSSH. The complete linux style permissions +allows for different shared folders for different groups of users. QHub comes with staging and production environments, as well as JupyterHub deploys. ## QHub Architecture -In addition to a robust integration of [**Dask**](https://dask.org/) and a new way of distributing environments with [**conda-store**](/~https://github.com/quansight/conda-store), -QHub brings together some of the widely used cloud deployment components in its architecture. - -QHub integrates [**Network File System (NFS)**](https://en.wikipedia.org/wiki/Network_File_System) protocol is used to allow Kubernetes applications to access storage. Files in -containers in a [**Kubernetes Pod**](https://kubernetes.io/docs/concepts/workloads/pods/pod/) are not persistent, which means if a container crashes, -[**kubelet**](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=Synopsis,object%20that%20describes%20a%20pod) will restart the container, however, -the files will not be preserved. The [**Kubernetes Volume**](https://kubernetes.io/docs/concepts/storage/volumes/#types-of-volumes) abstraction that QHub utilizes solves this -problem. - -NFS shares files directly from a container in a Kubernetes Pod, and sets up a [**Kubernetes Persistent Volume**](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) -accessed via NFS. Kubernetes' built‑in configuration for HTTP load balancing [**Ingress**](https://kubernetes.io/docs/concepts/services-networking/ingress/) defines and controls -the rules for external connectivity to Kubernetes services. Users who need to provide external access to their Kubernetes services create an Ingress resource that defines rules. - -QHub streamlines and manages all the Kubernetes architecture detailed above and delivers a smooth deployment process to its users through its intuitive interface. -![QHub_Architecture](../images/high_level_architecture.png) +In addition to a robust integration of [**Dask**](https://dask.org/) and a new way of distributing environments with +[**conda-store**](/~https://github.com/quansight/conda-store), QHub brings together some of the widely used cloud +deployment components in its architecture. + +QHub integrates [**Network File System (NFS)**](https://en.wikipedia.org/wiki/Network_File_System) protocol is used to +allow Kubernetes applications to access storage. Files in containers in a +[**Kubernetes Pod**](https://kubernetes.io/docs/concepts/workloads/pods/pod/) are not persistent, which means if a +container crashes, +[**kubelet**](https://kubernetes.io/docs/reference/command-line-tools-reference/kubelet/#:~:text=Synopsis,object%20that%20describes%20a%20pod) +will restart the container, however, the files will not be preserved. The +[**Kubernetes Volume**](https://kubernetes.io/docs/concepts/storage/volumes/#types-of-volumes) abstraction that QHub +utilizes solves this problem. + +NFS shares files directly from a container in a Kubernetes Pod, and sets up a +[**Kubernetes Persistent Volume**](https://kubernetes.io/docs/concepts/storage/persistent-volumes/) accessed via NFS. +Kubernetes' built‑in configuration for HTTP load balancing +[**Ingress**](https://kubernetes.io/docs/concepts/services-networking/ingress/) defines and controls the rules for +external connectivity to Kubernetes services. Users who need to provide external access to their Kubernetes services +create an Ingress resource that defines rules. + +QHub streamlines and manages all the Kubernetes architecture detailed above and delivers a smooth deployment process to +its users through its intuitive interface. ![QHub_Architecture](../images/high_level_architecture.png) QHub architecture and features allows you to: @@ -73,55 +83,67 @@ QHub architecture and features allows you to: ## Cloud Deployment on QHub -QHub deployments on the clouds follow the architectural structure shown for each provider in the diagrams below. To make cloud deployments, the respective configuration file needs -to be configured based on the user's cloud provider account credentials, as well as the details of users they would allow access to the deployment. +QHub deployments on the clouds follow the architectural structure shown for each provider in the diagrams below. To make +cloud deployments, the respective configuration file needs to be configured based on the user's cloud provider account +credentials, as well as the details of users they would allow access to the deployment. ### Infrastructure Provisioning (Common for all Clouds) -To provision the infrastructure, QHub uses [**Terraform**](https://www.terraform.io/), a tool for building, changing, and versioning infrastructure safely and efficiently. -Terraform enables QHub to have Infrastructure as Code to provision and manage cloud, infrastructure or service. Terraform has a system of modules/provider for dealing with various -cloud providers and various kinds of infrastructure, for instance it has modules for [**Amazon Web Services (AWS)**](https://aws.amazon.com/), +To provision the infrastructure, QHub uses [**Terraform**](https://www.terraform.io/), a tool for building, changing, +and versioning infrastructure safely and efficiently. Terraform enables QHub to have Infrastructure as Code to provision +and manage cloud, infrastructure or service. Terraform has a system of modules/provider for dealing with various cloud +providers and various kinds of infrastructure, for instance it has modules for +[**Amazon Web Services (AWS)**](https://aws.amazon.com/), [**Google Cloud Platform (GCP)**](https://cloud.google.com/gcp/?utm_source=google&utm_medium=cpc&utm_campaign=na-US-all-en-dr-bkws-all-all-trial-e-dr-1009135&utm_content=text-ad-lpsitelinkCCexp2-any-DEV_c-CRE_113120492887-ADGP_Hybrid+%7C+AW+SEM+%7C+BKWS+%7C+US+%7C+en+%7C+EXA+~+Google+Cloud+Platform-KWID_43700009942847400-kwd-26415313501&utm_term=KW_google%20cloud%20platform-ST_google+cloud+platform&gclid=CjwKCAjw9vn4BRBaEiwAh0muDLoAixDimMW9Sq12jfyBy6dMzxOU7ZW6-w44qWTJo-zRdpnBojzbexoCNGsQAvD_BwE), -[**Digital Ocean (DO)**](https://www.digitalocean.com/), as well as for [**Kubernetes**](https://kubernetes.io/) and [**Helm**](https://helm.sh/). +[**Digital Ocean (DO)**](https://www.digitalocean.com/), as well as for [**Kubernetes**](https://kubernetes.io/) and +[**Helm**](https://helm.sh/). ### Kubernetes (Common for all Clouds) -To manage the deployments on the Kubernetes cluster, QHub uses Helm, a package manager for Kubernetes. Helm packages Kubernetes configurations for deployment for ease of -distribution so that you can simply use a ready made package and deploy it to your Kubernetes cluster. +To manage the deployments on the Kubernetes cluster, QHub uses Helm, a package manager for Kubernetes. Helm packages +Kubernetes configurations for deployment for ease of distribution so that you can simply use a ready made package and +deploy it to your Kubernetes cluster. -The services are exposed via an [**Ingress**](https://kubernetes.io/docs/concepts/services-networking/ingress/) component of Kubernetes. Helm uses a packaging format called -[**Charts**](https://helm.sh/docs/topics/charts/), which is a collection of files that describe a related set of Kubernetes resources. Charts can be packaged into versioned +The services are exposed via an [**Ingress**](https://kubernetes.io/docs/concepts/services-networking/ingress/) +component of Kubernetes. Helm uses a packaging format called [**Charts**](https://helm.sh/docs/topics/charts/), which is +a collection of files that describe a related set of Kubernetes resources. Charts can be packaged into versioned archives to be deployed. They are also easy to rollback. -The [**Helm provider of Terraform**](/~https://github.com/hashicorp/terraform-provider-helm) takes the overrides supported by Helm. This makes it easier to use a standard chart with -custom settings, such as a custom image. +The [**Helm provider of Terraform**](/~https://github.com/hashicorp/terraform-provider-helm) takes the overrides supported +by Helm. This makes it easier to use a standard chart with custom settings, such as a custom image. -For JupyterHub and Dask, QHub uses the official Helm Charts and provide custom settings, which can be seen in: `dask-gateway.yaml` and `jupyterhub.yaml`, also with custom images, -which are stored in respective container registry of the cloud provider uses. +For JupyterHub and Dask, QHub uses the official Helm Charts and provide custom settings, which can be seen in: +`dask-gateway.yaml` and `jupyterhub.yaml`, also with custom images, which are stored in respective container registry of +the cloud provider uses. ### SSL and Ingress (Common for all Clouds) -To expose various services, such as the JupyterHub and Dask, present in the Kubernetes Cluster, QHub uses [Traefik Proxy](https://traefik.io/traefik/) which is a reverse proxy and -load balancer. +To expose various services, such as the JupyterHub and Dask, present in the Kubernetes Cluster, QHub uses +[Traefik Proxy](https://traefik.io/traefik/) which is a reverse proxy and load balancer. -[**SSL**](https://www.ssl.com/faqs/faq-what-is-ssl/) is a crucial part of any service exposed to the Internet. To handle this in Kubernetes, QHub utilizes -[**cert manager**](/~https://github.com/jetstack/cert-manager), a popular Kubernetes add-on to automate the management and issuance of TLS certificates from various issuing sources. +[**SSL**](https://www.ssl.com/faqs/faq-what-is-ssl/) is a crucial part of any service exposed to the Internet. To handle +this in Kubernetes, QHub utilizes [**cert manager**](/~https://github.com/jetstack/cert-manager), a popular Kubernetes +add-on to automate the management and issuance of TLS certificates from various issuing sources. ### AWS Cloud Architecture -The architecture of AWS uses [**Virtual Private Cloud (VPC)**](https://docs.aws.amazon.com/vpc/latest/userguide/what-is-amazon-vpc.html), which enables you to launch resources into -a virtual network. The VPC is the logically isolated section of AWS, which enables you to control how your network and AWS resources inside your network are exposed to the -Internet. There are subnets inside the VPC in multiple availability zones. The Kubernetes Cluster is inside the VPC, which by default isolates it from the internet by the very -nature of VPC. +The architecture of AWS uses +[**Virtual Private Cloud (VPC)**](https://docs.aws.amazon.com/vpc/latest/userguide/what-is-amazon-vpc.html), which +enables you to launch resources into a virtual network. The VPC is the logically isolated section of AWS, which enables +you to control how your network and AWS resources inside your network are exposed to the Internet. There are subnets +inside the VPC in multiple availability zones. The Kubernetes Cluster is inside the VPC, which by default isolates it +from the internet by the very nature of VPC. -QHub uses AWS’s managed Kubernetes service: [**Elastic Kubernetes Service (EKS)**](https://aws.amazon.com/eks/) to create a Kubernetes Cluster. Since VPC is an isolated part of the -AWS, you need a way to expose the services running inside the Kubernetes to the Internet, so that others can access it. This is achieved by an -[**Internet Gateway**](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html). It’s a VPC component that allows communication between the VPC and the Internet. +QHub uses AWS’s managed Kubernetes service: [**Elastic Kubernetes Service (EKS)**](https://aws.amazon.com/eks/) to +create a Kubernetes Cluster. Since VPC is an isolated part of the AWS, you need a way to expose the services running +inside the Kubernetes to the Internet, so that others can access it. This is achieved by an +[**Internet Gateway**](https://docs.aws.amazon.com/vpc/latest/userguide/VPC_Internet_Gateway.html). It’s a VPC component +that allows communication between the VPC and the Internet. ### Autoscaling -With QHub, system admins can customize and maintain their teams' compute needs and environments. The autoscaling of computers (Kubernetes and Pods) is done through Dask autoscaling -with CPU & GPU workers. +With QHub, system admins can customize and maintain their teams' compute needs and environments. The autoscaling of +computers (Kubernetes and Pods) is done through Dask autoscaling with CPU & GPU workers. ### Authentication diff --git a/docs/source/dev_guide/contribution.md b/docs/source/dev_guide/contribution.md index 0f5dae9497..756041ca8e 100644 --- a/docs/source/dev_guide/contribution.md +++ b/docs/source/dev_guide/contribution.md @@ -6,10 +6,12 @@ Please see [QHub Contribution Guidelines](/~https://github.com/Quansight/qhub/blob ## Adding new Integration and Features to Qhub -The preferred way to add new features/integrations to the `qhub-config.yaml` is via a new key as a namespace. If the new integration requires multiple `images` then use `images` -otherwise use `image`. Additionally `enabled` key determines is the feature is enabled or disabled. Ensure that the configuration options are in -[qhub/schema.py](/~https://github.com/Quansight/qhub/blob/main/qhub/schema.py). Additionally the configuration documentation in Qhub must reflect the configuration. At a minimum the -new feature should also be detailed in the administration guide and user guide. +The preferred way to add new features/integrations to the `qhub-config.yaml` is via a new key as a namespace. If the new +integration requires multiple `images` then use `images` otherwise use `image`. Additionally `enabled` key determines is +the feature is enabled or disabled. Ensure that the configuration options are in +[qhub/schema.py](/~https://github.com/Quansight/qhub/blob/main/qhub/schema.py). Additionally the configuration +documentation in Qhub must reflect the configuration. At a minimum the new feature should also be detailed in the +administration guide and user guide. ```yaml : diff --git a/docs/source/dev_guide/keycloak.md b/docs/source/dev_guide/keycloak.md index 0d564b577a..ecdefdc7cf 100644 --- a/docs/source/dev_guide/keycloak.md +++ b/docs/source/dev_guide/keycloak.md @@ -1,10 +1,10 @@ # Keycloak Testing -Keycloak is installed in the kubernetes cluster as part of QHub and it manages all users and groups, plus OAuth2 authentication for other QHub components such as JupyterHub and -Conda Store. +Keycloak is installed in the kubernetes cluster as part of QHub and it manages all users and groups, plus OAuth2 +authentication for other QHub components such as JupyterHub and Conda Store. -Sometimes you want a simple way to isolate and play around with Keycloak separately from QHub as a whole, but also want it to be configured in a similar way to Keycloak when part -of QHub. +Sometimes you want a simple way to isolate and play around with Keycloak separately from QHub as a whole, but also want +it to be configured in a similar way to Keycloak when part of QHub. ## Run Keycloak in Docker @@ -18,38 +18,43 @@ docker run -p 8080:8080 -e KEYCLOAK_USER=admin -e KEYCLOAK_PASSWORD=admin \ Note: the volume mount (-v flag) should allow data to persist when you stop and start the container. -You can now visit the Keycloak admin panel at http://localhost:8080/auth/ and login with username and password both as `admin`. +You can now visit the Keycloak admin panel at http://localhost:8080/auth/ and login with username and password both as +`admin`. ## Configure Keycloak -When deployed within QHub, Terraform code will configure Keycloak in a certain way. Here we attempt to perform similar configuration, but of course check the code for the latest -config. +When deployed within QHub, Terraform code will configure Keycloak in a certain way. Here we attempt to perform similar +configuration, but of course check the code for the latest config. ### Create QHub Realm -Once you've signed in, create a new realm. In the upper left portion of the admin console, you should see `Master`, which is the default realm. Hover over `Master` and a dropdown -should appear with a button labeled `Add realm`. Click that button. You should be taken to a form to create a new realm. Name the realm `qhub` (case sensitive) and click the +Once you've signed in, create a new realm. In the upper left portion of the admin console, you should see `Master`, +which is the default realm. Hover over `Master` and a dropdown should appear with a button labeled `Add realm`. Click +that button. You should be taken to a form to create a new realm. Name the realm `qhub` (case sensitive) and click the `Create` button. The admin console should now be in the qhub realm, as reflected in the upper left of the UI. ### Add groups and rules -All QHub deployments will have `users` and `admin` groups. For development, you need to add them manually. Click `Groups` on the side nav, and `New` - enter the name `users` in -lower case. Then do the same for `admin`. +All QHub deployments will have `users` and `admin` groups. For development, you need to add them manually. Click +`Groups` on the side nav, and `New` - enter the name `users` in lower case. Then do the same for `admin`. -The `users` group also needs to be configured so that it is a default group assigned to any new users. To do this, click the `Default Groups` tab within the `Groups` page. Click -`users` in the list of Available Groups, then `Add` to move it to the list of Default Groups on the left. +The `users` group also needs to be configured so that it is a default group assigned to any new users. To do this, click +the `Default Groups` tab within the `Groups` page. Click `users` in the list of Available Groups, then `Add` to move it +to the list of Default Groups on the left. ### Create an OAuth 2 client -Within the qhub realm, you will create a new client. Within QHub, a separate client exists for each of jupyterhub, conda-store, dask etc. In the side nav, click `Clients`, then -click the `Create` button. Fill out the form as show below: +Within the qhub realm, you will create a new client. Within QHub, a separate client exists for each of jupyterhub, +conda-store, dask etc. In the side nav, click `Clients`, then click the `Create` button. Fill out the form as show +below: | Form field | Value | | --------------- | -------------- | | Client ID | myclient | | Client Protocol | openid-connect | -After clicking `Save`, you will be taken to the client settings form. Make sure the form fields are filled in as follows: +After clicking `Save`, you will be taken to the client settings form. Make sure the form fields are filled in as +follows: **Settings tab:** @@ -58,13 +63,14 @@ After clicking `Save`, you will be taken to the client settings form. Make sure | Access Type | confidential | | Valid Redirect URIs | http://localhost:7010/oauth_callback | -> The redirect URI you use here will depend on how you want to test OAuth2 login flows. The example above would make sense if you are running your OAuth2 client (e.g. JupyterHub or -> Conda Store) at port 7010 locally, and it happens to have its callback URL at the path `/oauth_callback`. +> The redirect URI you use here will depend on how you want to test OAuth2 login flows. The example above would make +> sense if you are running your OAuth2 client (e.g. JupyterHub or Conda Store) at port 7010 locally, and it happens to +> have its callback URL at the path `/oauth_callback`. > > If you plan to test using Postman (see below) the callback will be `https://oauth.pstmn.io/v1/browser-callback`. -You will next create a new mapper for the `myclient` client. Go to the `Mapper` tab and click the `Create` button. Make sure the form is filled out as shown below and then click -the `Save` button. +You will next create a new mapper for the `myclient` client. Go to the `Mapper` tab and click the `Create` button. Make +sure the form is filled out as shown below and then click the `Save` button. **Mappers (create):** @@ -82,46 +88,55 @@ the `Save` button. ### Create Qhub login -You will now create a new user. This will be the user that you use to sign in to the Qhub control panel (which is separate from the Keycloak admin sign in). +You will now create a new user. This will be the user that you use to sign in to the Qhub control panel (which is +separate from the Keycloak admin sign in). -In Keycloak, go to `Users` in the side nav and click `Add user`. Give the user any username that you want (for these instructions, we will assume `quser`) and click `Save`. Go to -the `Credentials` tab, toggle off the `Temporary` field, and set a password for your user (we will assume `quser` for the password). +In Keycloak, go to `Users` in the side nav and click `Add user`. Give the user any username that you want (for these +instructions, we will assume `quser`) and click `Save`. Go to the `Credentials` tab, toggle off the `Temporary` field, +and set a password for your user (we will assume `quser` for the password). -In order for your new user to access the Qhub control panel, they must belong to the admin group. Go to `Users` in the side nav, click `View all users`, find your user, then click -`Edit` under the `Actions` column. Go to the `Groups` tab for `quser`. Under `Available Groups`, you should see `admin`. Click on `admin` then click `Join`. You should see the +In order for your new user to access the Qhub control panel, they must belong to the admin group. Go to `Users` in the +side nav, click `View all users`, find your user, then click `Edit` under the `Actions` column. Go to the `Groups` tab +for `quser`. Under `Available Groups`, you should see `admin`. Click on `admin` then click `Join`. You should see the `Group Membership` box update with `/admin`. ## Understanding JupyterHub OAuth2 For example, for login to JupyterHub in QHub, the OAuth2 flow works as follows: -1. JupyterHub redirects to Keycloak at this URL: `https://myqhub.net/auth/realms/qhub/protocol/openid-connect/auth` with some extra parameters including the client ID of jupyterhub - to say which client should be authenticated. +1. JupyterHub redirects to Keycloak at this URL: `https://myqhub.net/auth/realms/qhub/protocol/openid-connect/auth` with + some extra parameters including the client ID of jupyterhub to say which client should be authenticated. 2. Keycloak redirects to the callback `https://myqhub.net/hub/oauth_callback` plus a short auth code. -3. JupyterHub makes a server-to-server call to `http://localhost:8080/auth/realms/qhub/protocol/openid-connect/token` in order to exchange that code for a more permanent access - token. -4. JupyterHub then also makes a server-to-server call to `http://localhost:8080/auth/realms/qhub/protocol/openid-connect/userinfo` (passing the access token as authentication) in - order to obtain extra information about the user. This step is generally optional in OAuth2. +3. JupyterHub makes a server-to-server call to `http://localhost:8080/auth/realms/qhub/protocol/openid-connect/token` in + order to exchange that code for a more permanent access token. +4. JupyterHub then also makes a server-to-server call to + `http://localhost:8080/auth/realms/qhub/protocol/openid-connect/userinfo` (passing the access token as + authentication) in order to obtain extra information about the user. This step is generally optional in OAuth2. -If you check JupyterHub's logs you may see the results of the userinfo call - which should contain a list of groups due to the group mapper that was configured earlier. It may also -contain email address etc. +If you check JupyterHub's logs you may see the results of the userinfo call - which should contain a list of groups due +to the group mapper that was configured earlier. It may also contain email address etc. A similar flow happens for the other clients too (conda store etc). ## Testing OAuth2 using Postman -Without having to run a service such as JupyterHub to see, for example, that our group mapper is working, it is possible to test the login flow independently. It would be possible -using curl, but it takes a lot of effort to fully understand all the parameters needed at each stage. +Without having to run a service such as JupyterHub to see, for example, that our group mapper is working, it is possible +to test the login flow independently. It would be possible using curl, but it takes a lot of effort to fully understand +all the parameters needed at each stage. -Postman is an application for experimenting with APIs. It can run in your browser (which works best if you also install the 'Postman Agent' to run on your computer), or you can -install a native Mac app. See [Postman's website](https://www.postman.com/downloads/). +Postman is an application for experimenting with APIs. It can run in your browser (which works best if you also install +the 'Postman Agent' to run on your computer), or you can install a native Mac app. See +[Postman's website](https://www.postman.com/downloads/). -You can use Postman to try out the OAuth2 flow of a standalone Keycloak instance (in this example, running using Docker locally, as described above). +You can use Postman to try out the OAuth2 flow of a standalone Keycloak instance (in this example, running using Docker +locally, as described above). -Create a client within the qhub realm, as described above (for `myclient`). Going forward, let's assume the client is called `postman` and you registered it with the callback URL -`https://oauth.pstmn.io/v1/browser-callback` (which is what Postman uses for this purpose). +Create a client within the qhub realm, as described above (for `myclient`). Going forward, let's assume the client is +called `postman` and you registered it with the callback URL `https://oauth.pstmn.io/v1/browser-callback` (which is what +Postman uses for this purpose). -In the Postman app, go into a workspace (My Workspace is the default) and open a new 'request tab' in the main working area of the screen. +In the Postman app, go into a workspace (My Workspace is the default) and open a new 'request tab' in the main working +area of the screen. ![Screenshot of initial QHub login page](../images/dev_postman_for_keycloak.png) @@ -137,7 +152,8 @@ Then 'Configure New Token' using the defaults plus the following: - Client Secret: This is a guid-like string available from the Credentials tab of your Client in Keycloak - Scope: `profile` -Click the 'Get New Access Token' button. You will be prompted to sign into Keycloak (use the user you created in 'Create QHub login' above, not the admin user). +Click the 'Get New Access Token' button. You will be prompted to sign into Keycloak (use the user you created in 'Create +QHub login' above, not the admin user). This should result in an access token being displayed. Click the 'Use Token' button. @@ -145,5 +161,5 @@ Now your authentication is ready and you can use it to call the userinfo API. Click the blue 'Send' button towards the top of the window, next to the main userinfo URL. -The JSON results should be visible at the bottom of the window. If the group mapping was configured successfully, this should include a `groups` field listing any groups the user -has joined. +The JSON results should be visible at the bottom of the window. If the group mapping was configured successfully, this +should include a `groups` field listing any groups the user has joined. diff --git a/docs/source/dev_guide/minikube.md b/docs/source/dev_guide/minikube.md index 6f472d8f50..04275d762a 100644 --- a/docs/source/dev_guide/minikube.md +++ b/docs/source/dev_guide/minikube.md @@ -1,17 +1,22 @@ # Minikube -[Minikube](https://minikube.sigs.k8s.io/docs/) is a project allowing you to run a local Kubernetes node simulation for development and testing purposes. +[Minikube](https://minikube.sigs.k8s.io/docs/) is a project allowing you to run a local Kubernetes node simulation for +development and testing purposes. -It's possible to run QHub on Minikube, and this can allow quicker feedback loops for development, as well as being less expensive than running cloud Kubernetes clusters. +It's possible to run QHub on Minikube, and this can allow quicker feedback loops for development, as well as being less +expensive than running cloud Kubernetes clusters. -Local testing is a great way to test the components of QHub. It's important to highlight that while it's possible to test most of QHub with this version, components that are Cloud -provisioned such as VPCs, managed Kubernetes cluster and managed container registries can't be locally tested, due to their Cloud dependencies. +Local testing is a great way to test the components of QHub. It's important to highlight that while it's possible to +test most of QHub with this version, components that are Cloud provisioned such as VPCs, managed Kubernetes cluster and +managed container registries can't be locally tested, due to their Cloud dependencies. ## Compatibility -Currently, **QHub local deployment is primarily compatible with Linux-based Operating Systems**. The main limitation for the installation on MacOS relates to -[Docker Desktop for Mac](https://docs.docker.com/docker-for-mac/networking/#known-limitations-use-cases-and-workarounds) being unable to route traffic to containers. Theoretically, -the installation of HyperKit Driver could solve the issue, although the proposed solution isn't tested. There some workarounds for running +Currently, **QHub local deployment is primarily compatible with Linux-based Operating Systems**. The main limitation for +the installation on MacOS relates to +[Docker Desktop for Mac](https://docs.docker.com/docker-for-mac/networking/#known-limitations-use-cases-and-workarounds) +being unable to route traffic to containers. Theoretically, the installation of HyperKit Driver could solve the issue, +although the proposed solution isn't tested. There some workarounds for running [Minikube on Mac below](#minikube-on-mac). This guide assumes that you have the QHub repository downloaded, and you are at the root of the repository. @@ -27,20 +32,24 @@ To deploy QHub locally requires the installation of the following dependencies: The installation of a hypervisor isn't necessary. -> NOTE: Minikube requires `kubectl` OR you can use the embedded kubectl appropriate for your Minikube cluster version using `minikube kubectl`. To install `kubectl` -> [follow the instructions](https://kubernetes.io/docs/tasks/tools/) according to your operating system. +> NOTE: Minikube requires `kubectl` OR you can use the embedded kubectl appropriate for your Minikube cluster version +> using `minikube kubectl`. To install `kubectl` [follow the instructions](https://kubernetes.io/docs/tasks/tools/) +> according to your operating system. ## Initialize Kubernetes cluster -Before proceeding with the initialization, make sure to add yourself to the Docker group by executing the command `sudo usermod -aG docker && newgrp docker`. +Before proceeding with the initialization, make sure to add yourself to the Docker group by executing the command +`sudo usermod -aG docker && newgrp docker`. -Testing is done with Minikube. To confirm successful installation of both Docker and Minikube, you can run the following command to start up a local Kubernetes cluster: +Testing is done with Minikube. To confirm successful installation of both Docker and Minikube, you can run the following +command to start up a local Kubernetes cluster: ```shell minikube start --cpus 2 --memory 4096 --driver=docker ``` -The command downloads a Docker image of around 500Mb in size and initialise a cluster with 2 CPUs and 4 GB of RAM, with Docker as the chosen driver. +The command downloads a Docker image of around 500Mb in size and initialise a cluster with 2 CPUs and 4 GB of RAM, with +Docker as the chosen driver. Once `minikube start` finishes, run the command below to select the status of the cluster: @@ -60,19 +69,22 @@ kubeconfig: Configured timeToStop: Nonexistent ``` -After you have confirmed that Minikube is working, you can either continue to use, or you can stop your cluster. To stop your cluster, run: +After you have confirmed that Minikube is working, you can either continue to use, or you can stop your cluster. To stop +your cluster, run: ```bash minikube stop ``` -Next, the user installs `nfs-common` drivers. This is required by the JupyterLab instances, which require NFS mount for `PersistentVolumeClaims` (PVCs). To install it, run: +Next, the user installs `nfs-common` drivers. This is required by the JupyterLab instances, which require NFS mount for +`PersistentVolumeClaims` (PVCs). To install it, run: ```shell minikube ssh "sudo apt update; sudo apt install nfs-common -y" ``` -For more details on PVs and PVCs, read the [JupyterHub documentation](https://zero-to-jupyterhub.readthedocs.io/en/latest/jupyterhub/customizing/user-storage.html). +For more details on PVs and PVCs, read the +[JupyterHub documentation](https://zero-to-jupyterhub.readthedocs.io/en/latest/jupyterhub/customizing/user-storage.html). ## Optional pre-pulling and caching of Docker images @@ -81,22 +93,26 @@ For more details on PVs and PVCs, read the [JupyterHub documentation](https://ze ### Why pre-pull Docker images -As part of deployment, Minikube downloads Docker images that have a combined size of several Gigabytes. Each time minikube is destroyed and created it re-pulls these images. Also, -Terraform times out on slower internet connections if it takes longer than 10 minutes to pull the images. +As part of deployment, Minikube downloads Docker images that have a combined size of several Gigabytes. Each time +minikube is destroyed and created it re-pulls these images. Also, Terraform times out on slower internet connections if +it takes longer than 10 minutes to pull the images. -Images can be pre-pulled and added to the Minikube cache. This greatly reduce the time required for future deployments and reduces the data requiring download during deployment. +Images can be pre-pulled and added to the Minikube cache. This greatly reduce the time required for future deployments +and reduces the data requiring download during deployment. ### Pre-pulling and caching The following assumes that docker is currently installed. -The first step is to configure the minikube home directory environment variable. To set this to the home directory of the current user, run: +The first step is to configure the minikube home directory environment variable. To set this to the home directory of +the current user, run: ```bash export MINIKUBE_HOME=$HOME/.minikube ``` -The current list of Docker images is visible in the `qhub-config.yaml` file under the `default_images` key. Each image is pulled. +The current list of Docker images is visible in the `qhub-config.yaml` file under the `default_images` key. Each image +is pulled. ```bash docker pull quansight/qhub-jupyterhub:v0.x.x @@ -105,7 +121,8 @@ docker pull quansight/qhub-dask-worker:v0.x.x docker pull quansight/qhub-dask-gateway:v0.x.x ``` -Replacing `v0.x.x` with the current version that's listed. Note this may take several minutes. Once the process is complete, the user can copy them to the Minikube cache. +Replacing `v0.x.x` with the current version that's listed. Note this may take several minutes. Once the process is +complete, the user can copy them to the Minikube cache. ```bash minikube image load quansight/qhub-jupyterhub:v0.x.x @@ -114,7 +131,8 @@ minikube image load quansight/qhub-dask-worker:v0.x.x minikube image load quansight/qhub-dask-gateway:v0.x.x ``` -Again, adding the correct version. With this completed local Minikube deployment no longer requires pulling the preceding docker images. +Again, adding the correct version. With this completed local Minikube deployment no longer requires pulling the +preceding docker images. The preceding process is repeated with the updated tags when a new version of QHub is deployed. @@ -122,17 +140,21 @@ The preceding process is repeated with the updated tags when a new version of QH ## MetalLB -[MetalLB](https://metallb.universe.tf/) is the load balancer for bare-metal Kubernetes clusters. The user needs to configure MetalLB to match the QHub configuration. +[MetalLB](https://metallb.universe.tf/) is the load balancer for bare-metal Kubernetes clusters. The user needs to +configure MetalLB to match the QHub configuration. ## Automation of MetalLB with Python script *Skip to next section for configuration without Python* -Minikube doesn't provide a simple interactive way to configure addons, ([as shown in this repository issue](/~https://github.com/kubernetes/minikube/issues/8283)). It's recommended -to set load balancer start/stop IP address using a Python script with pre-established values. This recommendation is due to an existing DNS name that uses some addresses. +Minikube doesn't provide a simple interactive way to configure addons, +([as shown in this repository issue](/~https://github.com/kubernetes/minikube/issues/8283)). It's recommended to set load +balancer start/stop IP address using a Python script with pre-established values. This recommendation is due to an +existing DNS name that uses some addresses. -To do so, paste [this Python script](/~https://github.com/Quansight/qhub/blob/main/tests/scripts/minikube-loadbalancer-ip.py) in a text file named `minikube-loadbalancer-ip.py` and -then run: +To do so, paste +[this Python script](/~https://github.com/Quansight/qhub/blob/main/tests/scripts/minikube-loadbalancer-ip.py) in a text +file named `minikube-loadbalancer-ip.py` and then run: ```shell python minikube-loadbalancer-ip.py @@ -155,13 +177,15 @@ Copy the output image id and use it in the following command to obtain the Docke $ docker inspect --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}/{{.IPPrefixLen}}{{end}}' ``` -A example subnet range looks like `192.168.49.2/24`. This CIDR range has a starting IP of `192.168.49.0` and ending address of `192.168.49.255`. The `metallb` load balancer is -provided a range of IP addresses contained in the Docker CIDR range. If your CIDR is different, you can determine your range IP addresses from a CIDR address at +A example subnet range looks like `192.168.49.2/24`. This CIDR range has a starting IP of `192.168.49.0` and ending +address of `192.168.49.255`. The `metallb` load balancer is provided a range of IP addresses contained in the Docker +CIDR range. If your CIDR is different, you can determine your range IP addresses from a CIDR address at [this website](https://www.ipaddressguide.com/cidr). For this example case, the user assigns `metallb` a start IP address of `192.168.49.100` and an end of `192.168.49.150`. -The user can enable `metallb` as shown below. The command-line tool interface prompts the user for the start and stop IP range: +The user can enable `metallb` as shown below. The command-line tool interface prompts the user for the start and stop IP +range: ```shell minikube addons configure metallb @@ -188,8 +212,9 @@ ______________________________________________________________________
Click to expand note -The browser can have trouble reaching the load balancer running on WSL2. A workaround is to port forward the proxy-pod to the host IP 0.0.0.0. Get the ip address of the WSL2 -machine via `ip a`, which should be a 127.x.x.x address. To change the port forwarding after opening `k9s` you can type `:pods `, hover over the proxy-... pod and type +The browser can have trouble reaching the load balancer running on WSL2. A workaround is to port forward the proxy-pod +to the host IP 0.0.0.0. Get the ip address of the WSL2 machine via `ip a`, which should be a 127.x.x.x address. To +change the port forwarding after opening `k9s` you can type `:pods `, hover over the proxy-... pod and type ``, and enter the IP addresses.
@@ -220,11 +245,12 @@ Next, the user renders the infrastructure files from `qhub-config.yaml` running. python -m qhub deploy --config qhub-config.yaml --disable-prompt ``` -To ease development, the DNS record `github-actions.qhub.dev` is pointed to `192.168.49.100` so the next step is optional unless you end up with the load-balancer giving you a -different IP address. +To ease development, the DNS record `github-actions.qhub.dev` is pointed to `192.168.49.100` so the next step is +optional unless you end up with the load-balancer giving you a different IP address. -Make sure to point the DNS domain `github-actions.qhub.dev` to `192.168.49.100` from the previous commands. This is done in many ways, the easiest one is by modifying `/etc/hosts` -and adding the line below. The command overrides any DNS server. +Make sure to point the DNS domain `github-actions.qhub.dev` to `192.168.49.100` from the previous commands. This is done +in many ways, the easiest one is by modifying `/etc/hosts` and adding the line below. The command overrides any DNS +server. ```ini 192.168.49.100 github-actions.qhub.dev @@ -240,7 +266,8 @@ curl -k https://github-actions.qhub.dev/hub/login It's also possible to visit `https://github-actions.qhub.dev` in your web browser to select the deployment. -Since this is a local deployment, hence it's not visible to the internet; `https` certificates isn't signed by [Let's Encrypt](https://letsencrypt.org/). Thus, the certificates is +Since this is a local deployment, hence it's not visible to the internet; `https` certificates isn't signed by +[Let's Encrypt](https://letsencrypt.org/). Thus, the certificates is [self-signed by Traefik](https://en.wikipedia.org/wiki/Self-signed_certificate). Several browsers makes it difficult to view a self-signed certificate that's not added to the certificate registry. @@ -252,12 +279,13 @@ Each web browser handles this differently. A workaround for Firefox: And a workaround for Chrome: - Type `badidea` or `thisisunsafe` while viewing the rendered page (this has to do with - [how Chrome preloads some domains for its HTTP Strict Transport Security](https://hstspreload.org/) list in a way that can't be manually removed) + [how Chrome preloads some domains for its HTTP Strict Transport Security](https://hstspreload.org/) list in a way that + can't be manually removed) ## Cleanup -To delete all the QHub resources run the `destroy` command. Note that this won't delete your `qhub-config.yaml` and related rendered files thus a re-deployment via `deploy` is -possible afterwards. +To delete all the QHub resources run the `destroy` command. Note that this won't delete your `qhub-config.yaml` and +related rendered files thus a re-deployment via `deploy` is possible afterwards. ```shell python -m qhub destroy --config qhub-config.yaml @@ -275,13 +303,15 @@ ______________________________________________________________________ # Minikube on Mac -At one point developing with Minikube on a Mac worked, unfortunately this appears to no longer be the case. A few differences exists between how Minikube is deployed on Mac versus -Linux and if you're interested to try the instructions as they appeared when this worked, open the collapsed tab below: +At one point developing with Minikube on a Mac worked, unfortunately this appears to no longer be the case. A few +differences exists between how Minikube is deployed on Mac versus Linux and if you're interested to try the instructions +as they appeared when this worked, open the collapsed tab below:
Previous instructions -The earlier instructions for Minikube on Linux _nearly_ works on Mac except things that break without clever use of port forwarding at the right times. +The earlier instructions for Minikube on Linux _nearly_ works on Mac except things that break without clever use of port +forwarding at the right times. 1 - When working out the IP addresses to configure metallb try this: @@ -290,7 +320,8 @@ docker ps --format "{{.Names}} {{.ID}}" docker inspect --format='{{range .NetworkSettings.Networks}}{{.IPAddress}}/{{.IPPrefixLen}}{{end}}' ``` -This displays something like `192.168.49.2/24`, in which case a suitable IP range would be on the same subnet, for example start IP 192.168.49.100, end IP 192.168.49.150. +This displays something like `192.168.49.2/24`, in which case a suitable IP range would be on the same subnet, for +example start IP 192.168.49.100, end IP 192.168.49.150. 2 - This load balancer won't actually work, so you need to port-forward directly to the JupyterHub service: @@ -300,10 +331,12 @@ minikube kubectl -- --namespace=dev port-forward svc/proxy-public 8000:80 Then you could access QHub on http://127.0.0.1:8000/ -3 - However, the `qhub deploy` step needs to communicate with the Keycloak server, but this isn't possible without the correct hostname. +3 - However, the `qhub deploy` step needs to communicate with the Keycloak server, but this isn't possible without the +correct hostname. -It might be possible to set `/etc/hosts` to include `github-actions.qhub.dev` as they are done for the AWS minikube, below. And meanwhile use kubectl port-forward to actually -forward the traffic (from port 443 to something similar?). But you'd have to start that forwarding at the right point in the deployment. (When Kubernetes is ready, but before +It might be possible to set `/etc/hosts` to include `github-actions.qhub.dev` as they are done for the AWS minikube, +below. And meanwhile use kubectl port-forward to actually forward the traffic (from port 443 to something similar?). But +you'd have to start that forwarding at the right point in the deployment. (When Kubernetes is ready, but before terraform runs the Keycloak operator...)
@@ -311,12 +344,14 @@ ______________________________________________________________________ # Minikube on AWS -It's possible to run Minikube on AWS (and probably the other clouds). This is useful where you don't have enough memory to run QHub in a local Minikube cluster on your laptop, or -if you are using Mac or Windows and struggling to get Minikube to work. +It's possible to run Minikube on AWS (and probably the other clouds). This is useful where you don't have enough memory +to run QHub in a local Minikube cluster on your laptop, or if you are using Mac or Windows and struggling to get +Minikube to work. -Please understand that running Minikube on an AWS EC2 instance isn't the same as 'proper' deployment of QHub to AWS EKS (Kubernetes service). You might prefer Minikube on AWS over -full AWS EKS deployment for testing purposes if you find Minikube easier to debug, cheaper to run, or if you want to replicate the Minikube setup directly - for example, if trying -to fix the automated Cypress tests which use Minikube within a GitHub actions workflow. +Please understand that running Minikube on an AWS EC2 instance isn't the same as 'proper' deployment of QHub to AWS EKS +(Kubernetes service). You might prefer Minikube on AWS over full AWS EKS deployment for testing purposes if you find +Minikube easier to debug, cheaper to run, or if you want to replicate the Minikube setup directly - for example, if +trying to fix the automated Cypress tests which use Minikube within a GitHub actions workflow. There are some tricks that can make allow Minikube on AWS to feel much like local Minikube for development. @@ -331,7 +366,8 @@ export AWS_PROFILE=quansight export AWS_DEFAULT_REGION="eu-west-2" ``` -This assumes you have a 'quansight' profile in your ~/.aws/config and credentials files, but instead you can set `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` directly. +This assumes you have a 'quansight' profile in your ~/.aws/config and credentials files, but instead you can set +`AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` directly. ## Create a Key Pair @@ -350,8 +386,8 @@ chmod 400 ~/.ssh/${MYKEYNAME}.pem ## Run the EC2 instance -The recommended image is an Ubuntu 20.04 with Docker installed. It's recommended to be to run on a 16 GB/4 Core image, and increase the EBS disk space to 48 GB or so, up from the -standard 8 GB. +The recommended image is an Ubuntu 20.04 with Docker installed. It's recommended to be to run on a 16 GB/4 Core image, +and increase the EBS disk space to 48 GB or so, up from the standard 8 GB. ```bash aws ec2 run-instances --image-id ami-0cd5fb602c264fbd6 --instance-type t3a.xlarge --count 1 --key-name ${MYKEYNAME} --block-device-mappings 'DeviceName=/dev/sda1,Ebs={VolumeSize=48}' @@ -367,9 +403,11 @@ This should show all instances, so work out which one you need if there are mult ## Open SSH port access -Using the instance ID you obtained just preceding (for example `i-01bd8a4ee6016e1fe`), use that to first query for the 'security GroupSet ID' (for example `sg-96f73feb`). +Using the instance ID you obtained just preceding (for example `i-01bd8a4ee6016e1fe`), use that to first query for the +'security GroupSet ID' (for example `sg-96f73feb`). -Then use that to open up port 22 for the security group (and hence for the instance). Multiple instances running in this security group, all of which are now exposed on Port 22. +Then use that to open up port 22 for the security group (and hence for the instance). Multiple instances running in this +security group, all of which are now exposed on Port 22. ```bash aws ec2 describe-instance-attribute --instance-id i-01bd8a4ee6016e1fe --attribute groupSet @@ -449,7 +487,8 @@ sed -i -E 's/(cpu_guarantee):\s+[0-9\.]+/\1: 1/g' "qhub-config.yaml" sed -i -E 's/(mem_guarantee):\s+[A-Za-z0-9\.]+/\1: 1G/g' "qhub-config.yaml" ``` -The preceding two commands reduce slightly the memory and CPU requirements of JupyterLab sessions etc. Make any other changes needed to the `qhub-config.yaml` file. +The preceding two commands reduce slightly the memory and CPU requirements of JupyterLab sessions etc. Make any other +changes needed to the `qhub-config.yaml` file. Then deploy: @@ -459,7 +498,8 @@ qhub deploy --config qhub-config.yaml --disable-prompt ## Enable Kubernetes access from Mac -This step is optional, but allows you to use `kubectl` and `k9s` directly from your Mac. It's not needed if you are satisfied to use kubectl within an SSH session on AWS instead. +This step is optional, but allows you to use `kubectl` and `k9s` directly from your Mac. It's not needed if you are +satisfied to use kubectl within an SSH session on AWS instead. On your Mac laptop: @@ -509,18 +549,20 @@ users: EOF ``` -Now SSH into the AWS instance, enabling port forwarding so you can access the Minikube cluster as though It's running on your Mac: +Now SSH into the AWS instance, enabling port forwarding so you can access the Minikube cluster as though It's running on +your Mac: ```bash ssh -i ~/.ssh/${MYKEYNAME}.pem ubuntu@ec2-18-130-21-222.eu-west-2.compute.amazonaws.com -L 127.0.0.1:8443:192.168.49.2:8443 ``` -You should now find that `kubectl` and `k9` work for the Minikube cluster if you run them on your Mac. This can include `kubectl port-forward` to access Kubernetes services -individually. +You should now find that `kubectl` and `k9` work for the Minikube cluster if you run them on your Mac. This can include +`kubectl port-forward` to access Kubernetes services individually. ## Access the full QHub website -However, the way Traefik works won't allow you to port forward to that since it won't see the right domain name and port. +However, the way Traefik works won't allow you to port forward to that since it won't see the right domain name and +port. The users can trick it by setting up a hostname alias. Run `sudo vi /etc/hosts` on the Mac and add: @@ -536,5 +578,6 @@ sudo ssh -i ~/.ssh/${MYKEYNAME}.pem ubuntu@ec2-35-177-109-173.eu-west-2.compute. This is executed with the `sudo` privileges because forwarding a low-numbered port, like 443, is not allowed otherwise. -Now you can access https://github-actions.qhub.dev/ in a browser and you should be able to use your QHub. You have to bypass the self-signed cert warnings though - see -[verify the local deployment](#verify-the-local-deployment) for instructions. +Now you can access https://github-actions.qhub.dev/ in a browser and you should be able to use your QHub. You have to +bypass the self-signed cert warnings though - see [verify the local deployment](#verify-the-local-deployment) for +instructions. diff --git a/docs/source/dev_guide/release.md b/docs/source/dev_guide/release.md index e3f8ea1bec..0e9a99e724 100644 --- a/docs/source/dev_guide/release.md +++ b/docs/source/dev_guide/release.md @@ -2,16 +2,18 @@ ## Release Checklist -To generate a checklist that will guide you through the release process, create an issue from the `Release Checklist` issue template on GitHub. This issue is also used as a -centralized place to discuss any release items as a team. +To generate a checklist that will guide you through the release process, create an issue from the `Release Checklist` +issue template on GitHub. This issue is also used as a centralized place to discuss any release items as a team. -> NOTE: [Click here](/~https://github.com/Quansight/qhub/issues/new?assignees=&labels=type%3A+release+%F0%9F%8F%B7&template=release-checklist.md&title=%5BRELEASE%5D+-+%3Cversion%3E) +> NOTE: +> [Click here](/~https://github.com/Quansight/qhub/issues/new?assignees=&labels=type%3A+release+%F0%9F%8F%B7&template=release-checklist.md&title=%5BRELEASE%5D+-+%3Cversion%3E) > to create a new release checklist issue. ### Testing Checklist -As part of major releases, more hands-on, in-depth testing is required to validate that the core features and services are working as expected. To generate a checklist that will -guide you through the testing process, create an issue from the `Testing Checklist` issue template on GitHub. +As part of major releases, more hands-on, in-depth testing is required to validate that the core features and services +are working as expected. To generate a checklist that will guide you through the testing process, create an issue from +the `Testing Checklist` issue template on GitHub. > NOTE: > [Click here](/~https://github.com/Quansight/qhub/issues/new?assignees=&labels=type%3A+release+%F0%9F%8F%B7&template=testing-checklist.md&title=Testing+checklist+for+%3Cversion%3E) diff --git a/docs/source/dev_guide/testing.md b/docs/source/dev_guide/testing.md index 2618e3f82e..873cbb0917 100644 --- a/docs/source/dev_guide/testing.md +++ b/docs/source/dev_guide/testing.md @@ -2,27 +2,33 @@ ## Using a development branch -To use qhub from a development branch such as `main` set the environment variable `QHUB_GH_BRANCH` before running qhub commands: +To use qhub from a development branch such as `main` set the environment variable `QHUB_GH_BRANCH` before running qhub +commands: ``` export QHUB_GH_BRANCH=main ``` -Then `qhub init` will create a qhub-config.yaml containing, for example, `quansight/qhub-jupyterlab:main` which is the Docker image built based on the Dockerfiles specified in the -main branch of the QHub repo (see below for more info on how these are specified). There is a GitHub workflow that will build these images and push to Docker Hub whenever a change -is made to the relevant files on GitHub. +Then `qhub init` will create a qhub-config.yaml containing, for example, `quansight/qhub-jupyterlab:main` which is the +Docker image built based on the Dockerfiles specified in the main branch of the QHub repo (see below for more info on +how these are specified). There is a GitHub workflow that will build these images and push to Docker Hub whenever a +change is made to the relevant files on GitHub. -In addition, `qhub deploy` can use QHUB_GH_BRANCH to create GitHub/GitLab workflows which install the development branch of QHub for their own deploy steps. +In addition, `qhub deploy` can use QHUB_GH_BRANCH to create GitHub/GitLab workflows which install the development branch +of QHub for their own deploy steps. -If you want to use the development version of QHub for your init and deploy but want your resulting deployment to be based on a full release version, don't set the QHUB_GH_BRANCH -environment variable. In that case, Docker tags and workflow `pip install qhub` commands will be based on the qhub version specified in the `qhub/version.py` file, but these tags -and releases may not yet exist, perhaps if the version has been updated to include a beta/dev component which has not been released. So you may need to manually modify the -qhub-config.yaml to 'downgrade' the tags to a full release version. +If you want to use the development version of QHub for your init and deploy but want your resulting deployment to be +based on a full release version, don't set the QHUB_GH_BRANCH environment variable. In that case, Docker tags and +workflow `pip install qhub` commands will be based on the qhub version specified in the `qhub/version.py` file, but +these tags and releases may not yet exist, perhaps if the version has been updated to include a beta/dev component which +has not been released. So you may need to manually modify the qhub-config.yaml to 'downgrade' the tags to a full release +version. ### Kubernetes Version Check for Cloud Providers -When `qhub init ` is called, it checks that the `--kubernetes-version` provided is supported by the preferred cloud provider. This flag is optional and if not -provided, the `kubernetes_version` is set to the most recent kubernetes version available. This is achieved by using the cloud provider's SDK which thus requires their appropriate +When `qhub init ` is called, it checks that the `--kubernetes-version` provided is supported by the +preferred cloud provider. This flag is optional and if not provided, the `kubernetes_version` is set to the most recent +kubernetes version available. This is achieved by using the cloud provider's SDK which thus requires their appropriate credentials to be set. To get around this, simply set the `QHUB_K8S_VERSION` environment variable like so: ``` @@ -31,8 +37,9 @@ export QHUB_K8S_VERSION=1.20 ## Modifying Docker Images -All QHub docker images are located in [`qhub/template/image`](/~https://github.com/Quansight/qhub/tree/main/qhub/template/image). You can build any image locally. Additionally, on -Pull Requests each Docker-build will be tested. +All QHub docker images are located in +[`qhub/template/image`](/~https://github.com/Quansight/qhub/tree/main/qhub/template/image). You can build any image +locally. Additionally, on Pull Requests each Docker-build will be tested. ```shell docker build -f Dockerfile. . @@ -58,13 +65,14 @@ Then open the localhost (127.0.0.1) link that's in the terminal ## Linting Dockerfiles -To lint Dockerfiles, developers use a tool called [Hadolint](/~https://github.com/hadolint/hadolint). Hadolint is a Dockerfile linter that allows to discover issues with the -Dockerfiles and recommends [best practices to be followed](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/). QHub CI automates Hadolint code reviews on -every commit and pull request, reporting code style and error prone issues. +To lint Dockerfiles, developers use a tool called [Hadolint](/~https://github.com/hadolint/hadolint). Hadolint is a +Dockerfile linter that allows to discover issues with the Dockerfiles and recommends +[best practices to be followed](https://docs.docker.com/develop/develop-images/dockerfile_best-practices/). QHub CI +automates Hadolint code reviews on every commit and pull request, reporting code style and error prone issues. To run Hadolint locally you can either install it locally or use a container image. Instructions are available on the -[install documentation for HadoLint](/~https://github.com/hadolint/hadolint#install). The `.hadolint.yml` on the root directory defines the ignored rules. To run Hadolint on -Dockerfiles run: +[install documentation for HadoLint](/~https://github.com/hadolint/hadolint#install). The `.hadolint.yml` on the root +directory defines the ignored rules. To run Hadolint on Dockerfiles run: ```shell hadolint ./qhub/template/\{\{\ cookiecutter.repo_directory\ \}\}/image/Dockerfile.conda-store @@ -74,20 +82,24 @@ hadolint ./qhub/template/\{\{\ cookiecutter.repo_directory\ \}\}/image/Dockerfil hadolint ./qhub/template/\{\{\ cookiecutter.repo_directory\ \}\}/image/Dockerfile.jupyterlab ``` -Hadolint will report `error`, `warning`, `info` and `style` while linting Dockerfiles. In case of an error, the CI fails. +Hadolint will report `error`, `warning`, `info` and `style` while linting Dockerfiles. In case of an error, the CI +fails. ## Debug Kubernetes clusters -To debug Kubernetes clusters, checkout [`k9s`](https://k9scli.io/), a terminal-based UI that aims to simplify navigation, observation, and management of applications in Kubernetes. -`k9s` continuously monitors Kubernetes clusters for changes and provides shortcut commands to interact with the observed resources becoming a fast way to review and resolve -day-to-day issues in deployed clusters. +To debug Kubernetes clusters, checkout [`k9s`](https://k9scli.io/), a terminal-based UI that aims to simplify +navigation, observation, and management of applications in Kubernetes. `k9s` continuously monitors Kubernetes clusters +for changes and provides shortcut commands to interact with the observed resources becoming a fast way to review and +resolve day-to-day issues in deployed clusters. -Installation can be done on a macOS, in Windows, and Linux and instructions are located [here](/~https://github.com/derailed/k9s). For more details on usage, review the +Installation can be done on a macOS, in Windows, and Linux and instructions are located +[here](/~https://github.com/derailed/k9s). For more details on usage, review the [Troubleshooting documentation](https://docs.qhub.dev/en/stable/source/admin_guide/troubleshooting.html#debug-your-kubernetes-cluster). ## Cypress Tests -Cypress automates testing within a web browser environment. It's integrated into the GitHub Actions tests.yaml workflows in this repo, and you can also run it locally. To do so: +Cypress automates testing within a web browser environment. It's integrated into the GitHub Actions tests.yaml workflows +in this repo, and you can also run it locally. To do so: ```shell cd tests_e2e @@ -100,19 +112,23 @@ export CYPRESS_EXAMPLE_USER_PASSWORD= npm run cypress:open ``` -The Base URL can point anywhere that should be accessible - it can be the URL of a QHub cloud deployment. The QHub Config Path should point to the associated yaml file for that -site. Most importantly, the tests will inspect the yaml file to understand what tests are relevant. To start with, it checks security.authentication.type to determine what should -be available on the login page, and how to test it. If the login type is 'password' then it uses the value in `CYPRESS_EXAMPLE_USER_PASSWORD` as the password (default username is -`example-user` but this can be changed by setting `CYPRESS_EXAMPLE_USER_NAME`). +The Base URL can point anywhere that should be accessible - it can be the URL of a QHub cloud deployment. The QHub +Config Path should point to the associated yaml file for that site. Most importantly, the tests will inspect the yaml +file to understand what tests are relevant. To start with, it checks security.authentication.type to determine what +should be available on the login page, and how to test it. If the login type is 'password' then it uses the value in +`CYPRESS_EXAMPLE_USER_PASSWORD` as the password (default username is `example-user` but this can be changed by setting +`CYPRESS_EXAMPLE_USER_NAME`). -The final command, in the preceding code-snippet, opens the Cypress UI where you can run the tests manually and see the actions in the browser. +The final command, in the preceding code-snippet, opens the Cypress UI where you can run the tests manually and see the +actions in the browser. Note that tests are heavily state dependent, so any changes or use of the deployed QHub could affect the results. ## Deployment and integration tests -Deployment and Integration testing makes it easier to test various features of deployed QHub on Minikube such as Dask Gateway, external integrations, state of the kubernetes -cluster via simple Python code. You can run the integration and deployment tests via the following command: +Deployment and Integration testing makes it easier to test various features of deployed QHub on Minikube such as Dask +Gateway, external integrations, state of the kubernetes cluster via simple Python code. You can run the integration and +deployment tests via the following command: ```shell pytest tests_deployment/ -v @@ -120,11 +136,14 @@ pytest tests_deployment/ -v # Cloud Testing -Cloud testing on AWS, GCP, Azure, and Digital Ocean can be significantly more complicated and time consuming. But it's the only way to truly test the cloud deployments, including -infrastructure, of course. To test on cloud Kubernetes, just deploy QHub in the normal way on those clouds, but using the [linked pip install](./index.md) of the QHub package. +Cloud testing on AWS, GCP, Azure, and Digital Ocean can be significantly more complicated and time consuming. But it's +the only way to truly test the cloud deployments, including infrastructure, of course. To test on cloud Kubernetes, just +deploy QHub in the normal way on those clouds, but using the [linked pip install](./index.md) of the QHub package. -Even with the dev install of the qhub package, you may find that the deployed cluster doesn't actually reflect any development changes, for example to the Docker images for -JupyterHub or JupyterLab. That will be because your qhub-config.yaml references fully released versions. See [Using a development branch](#using-a-development-branch) above for how -to encourage the Docker images to be specified based on the latest development code. +Even with the dev install of the qhub package, you may find that the deployed cluster doesn't actually reflect any +development changes, for example to the Docker images for JupyterHub or JupyterLab. That will be because your +qhub-config.yaml references fully released versions. See [Using a development branch](#using-a-development-branch) above +for how to encourage the Docker images to be specified based on the latest development code. -You should always prefer the local testing when possible as it will be easier to debug, may be quicker to deploy, and is likely to be less expensive. +You should always prefer the local testing when possible as it will be easier to debug, may be quicker to deploy, and is +likely to be less expensive. diff --git a/docs/source/installation/configuration.md b/docs/source/installation/configuration.md index dc024ae4b5..3806f4520f 100644 --- a/docs/source/installation/configuration.md +++ b/docs/source/installation/configuration.md @@ -1,8 +1,10 @@ # Advanced configuration -This is a page dedicated to the `qhub-config.yaml` file, the file that `qhub` uses to deploy and redeploy changes to your infrastructure. The `qhub-config.yaml` configuration file -is split into several sections and in this page, we detail the requirements necessary for this YAML-formatted configuration file. In the [Usage](usage.md) section we covered how -you can auto-generate this file using `qhub init` (and properly set options/flags and environment variables). +This is a page dedicated to the `qhub-config.yaml` file, the file that `qhub` uses to deploy and redeploy changes to +your infrastructure. The `qhub-config.yaml` configuration file is split into several sections and in this page, we +detail the requirements necessary for this YAML-formatted configuration file. In the [Usage](usage.md) section we +covered how you can auto-generate this file using `qhub init` (and properly set options/flags and environment +variables). > NOTE: The configuration file is always validated by a [pydantic schema](https://pydantic-docs.helpmanual.io/) in > [qhub/schema.py](/~https://github.com/Quansight/qhub/blob/main/qhub/schema.py) @@ -16,11 +18,11 @@ provider: # determines the choice of cloud provider for the dep domain: "do.qhub.dev" # top level URL exposure to monitor JupyterLab ``` -- `project_name`: should be compatible with the Cloud provider naming convention. Generally only use `A-Z`, `a-z`, `-`, and `_` (see - [Project Naming Conventions](./usage.md#project-naming-convention) for more details). +- `project_name`: should be compatible with the Cloud provider naming convention. Generally only use `A-Z`, `a-z`, `-`, + and `_` (see [Project Naming Conventions](./usage.md#project-naming-convention) for more details). -- `namespace`: is used in combination with `project_name` to label resources. In addition `namespace` also determines the `namespace` that used when deploying kubernetes resources - for qhub. +- `namespace`: is used in combination with `project_name` to label resources. In addition `namespace` also determines + the `namespace` that used when deploying kubernetes resources for qhub. - Default value: `dev` @@ -32,13 +34,15 @@ domain: "do.qhub.dev" # top level URL exposure to monitor JupyterLab - `azure` for Microsoft Azure - `local` for a local or existing kubernetes deployment -- `domain`: is the top level URL to put JupyterLab and future services under (such a monitoring). For example `qhub.dev` would be the domain for JupyterHub to be exposed under. +- `domain`: is the top level URL to put JupyterLab and future services under (such a monitoring). For example `qhub.dev` + would be the domain for JupyterHub to be exposed under. ## Continuous integration and continuous deployment -`ci_cd`: is optional and specifies the continuous integration and continuous deployment framework to use. QHub uses infrastructure-as-code to allow developers and users of QHub to -request change to the environment via pull requests (PRs) which then get approved by administration. You may configure CI/CD process to watch for pull-requests or commits on -specific branches. Currently CI/CD can be setup for either GitHub Actions or GitLab CI. +`ci_cd`: is optional and specifies the continuous integration and continuous deployment framework to use. QHub uses +infrastructure-as-code to allow developers and users of QHub to request change to the environment via pull requests +(PRs) which then get approved by administration. You may configure CI/CD process to watch for pull-requests or commits +on specific branches. Currently CI/CD can be setup for either GitHub Actions or GitLab CI. ```yaml ci_cd: @@ -55,29 +59,33 @@ ci_cd: - `type`: current supported CI providers are `github-actions` and `gitlab-ci` - `branch`: branch to use to commit `qhub render` changes to - `commit_render`: whether to commit the rendered changes back into the repo. Optional, defaults to `true`. -- `before_script`: optional script to run before CI starts QHub infrastructure deployment. This is useful in cases that additional setup is required for QHub to deploy the - resources. Only supported on `gitlab-ci` at the moment. -- `after_script`: optional script to run after CI ends QHub infrastructure deployment. This is useful in cases to notify resources of successful QHub deployment. Only supported on - `gitlab-ci` at the moment. +- `before_script`: optional script to run before CI starts QHub infrastructure deployment. This is useful in cases that + additional setup is required for QHub to deploy the resources. Only supported on `gitlab-ci` at the moment. +- `after_script`: optional script to run after CI ends QHub infrastructure deployment. This is useful in cases to notify + resources of successful QHub deployment. Only supported on `gitlab-ci` at the moment. -If `ci_cd` is not supplied, no CI/CD will be auto-generated, however, we advise employing an infrastructure-as-code approach. This allows teams to more quickly modify their QHub -deployment, empowering developers and data scientists to request the changes and have them approved by an administrator. +If `ci_cd` is not supplied, no CI/CD will be auto-generated, however, we advise employing an infrastructure-as-code +approach. This allows teams to more quickly modify their QHub deployment, empowering developers and data scientists to +request the changes and have them approved by an administrator. ## Certificate -By default, to simplify initial deployment `qhub` uses traefik to create a self-signed certificate. In order to create a certificate that's signed so that web browsers don't throw -errors we currently support [Let's Encrypt](https://letsencrypt.org/). +By default, to simplify initial deployment `qhub` uses traefik to create a self-signed certificate. In order to create a +certificate that's signed so that web browsers don't throw errors we currently support +[Let's Encrypt](https://letsencrypt.org/). ```yaml certificate: type: self-signed ``` -To use Let's Encrypt you must specify an email address that Let's Encrypt will associate the generated certificate with and whether to use the -[staging server](https://acme-staging-v02.api.letsencrypt.org/directory) or [production server](https://acme-v02.api.letsencrypt.org/directory). In general you should use the -production server, as seen below. +To use Let's Encrypt you must specify an email address that Let's Encrypt will associate the generated certificate with +and whether to use the [staging server](https://acme-staging-v02.api.letsencrypt.org/directory) or +[production server](https://acme-v02.api.letsencrypt.org/directory). In general you should use the production server, as +seen below. -> NOTE: Let's Encrypt heavily rate limits their production endpoint and provisioning https certificates can often fail due to this limit. +> NOTE: Let's Encrypt heavily rate limits their production endpoint and provisioning https certificates can often fail +> due to this limit. ```yaml certificate: @@ -88,8 +96,8 @@ certificate: Note the above snippet will already be present if you provided an `--ssl-cert-email` when you ran `qhub init`. -You may also supply a custom self-signed certificate and secret key. Note that the kubernetes default namespace that QHub uses is `dev`. Otherwise, it will be your `namespace` -defined in the `qhub-config.yaml`. +You may also supply a custom self-signed certificate and secret key. Note that the kubernetes default namespace that +QHub uses is `dev`. Otherwise, it will be your `namespace` defined in the `qhub-config.yaml`. ```yaml certificate: @@ -105,22 +113,25 @@ kubectl create secret tls \ --cert=path/to/cert/file --key=path/to/key/file ``` -> NOTE: the default kubernetes namespace that QHub uses is `dev`, however you can change the `namespace` key in the `qhub-config.yaml`. +> NOTE: the default kubernetes namespace that QHub uses is `dev`, however you can change the `namespace` key in the +> `qhub-config.yaml`. ### Wildcard certificates -Some of QHub services might require special subdomains under your certificate, Wildcard certificates allow you to secure all subdomains of a domain with a single certificate. -Defining a wildcard certificate decreases the amount of CN names you would need to define under the certificate configuration and reduces the chance of generating a wrong -subdomain. +Some of QHub services might require special subdomains under your certificate, Wildcard certificates allow you to secure +all subdomains of a domain with a single certificate. Defining a wildcard certificate decreases the amount of CN names +you would need to define under the certificate configuration and reduces the chance of generating a wrong subdomain. -> NOTE: It's not possible to request a double wildcard certificate for a domain (for example *.*.local.com). As a default behavior of -> [Traefik](https://doc.traefik.io/traefik/https/tls/#default-certificate), if the Domain Name System (DNS) and Common Name (CN) name doesn't match, Traefik generates and uses a -> self-signed certificate. This may lead to some unexpected [TLS](https://www.internetsociety.org/deploy360/tls/basics) issues, so as alternative to including each specific domain -> under the certificate CN list, you may also define a wildcard certificate. +> NOTE: It's not possible to request a double wildcard certificate for a domain (for example *.*.local.com). As a +> default behavior of [Traefik](https://doc.traefik.io/traefik/https/tls/#default-certificate), if the Domain Name +> System (DNS) and Common Name (CN) name doesn't match, Traefik generates and uses a self-signed certificate. This may +> lead to some unexpected [TLS](https://www.internetsociety.org/deploy360/tls/basics) issues, so as alternative to +> including each specific domain under the certificate CN list, you may also define a wildcard certificate. ## Security -This section walks through security and user authentication as it relates to QHub deployments. There are a few different ways to handle user authentication: +This section walks through security and user authentication as it relates to QHub deployments. There are a few different +ways to handle user authentication: - Auth0 - GitHub @@ -136,14 +147,15 @@ security: client_secret: ``` -In previous QHub versions (prior to `v0.4.0`), users and groups were added directly into the `qhub-config.yaml`. Starting with `v0.4.0`, user and group management is handled by -[Keycloak as described below](#keycloak). +In previous QHub versions (prior to `v0.4.0`), users and groups were added directly into the `qhub-config.yaml`. +Starting with `v0.4.0`, user and group management is handled by [Keycloak as described below](#keycloak). ### Omitting sensitive values -If you wish to avoid storing secrets etc. directly in the config yaml file you can instead set the values in environment variables. This substitution is triggered by setting config -values to "QHUB_SECRET\_" followed by the environment variable name. For example, you could set the environment variables "github_client_id" and "github_client_key" and write the -following in your config file: +If you wish to avoid storing secrets etc. directly in the config yaml file you can instead set the values in environment +variables. This substitution is triggered by setting config values to "QHUB_SECRET\_" followed by the environment +variable name. For example, you could set the environment variables "github_client_id" and "github_client_key" and write +the following in your config file: ```yaml security: @@ -156,18 +168,22 @@ security: ### Authentication -`security.authentication` is for configuring the OAuth and GitHub Provider, password based authentication, or custom authentication. +`security.authentication` is for configuring the OAuth and GitHub Provider, password based authentication, or custom +authentication. #### Auth0 based authentication -[Auth0](https://auth0.com/#!) can be used for authentication. While it is not free, there is a reasonable free tier that allows deployment of QHub clusters using many different -social providers, passwordless, and email based authentication methods. +[Auth0](https://auth0.com/#!) can be used for authentication. While it is not free, there is a reasonable free tier that +allows deployment of QHub clusters using many different social providers, passwordless, and email based authentication +methods. -QHub has command line options for `qhub init` which automates the creation Auth0 web app via: `--auth-provider=auth0 --auth-auto-provision`. +QHub has command line options for `qhub init` which automates the creation Auth0 web app via: +`--auth-provider=auth0 --auth-auto-provision`. -Otherwise here are docs on [creating an Auth0 Application](https://auth0.com/docs/applications). Make sure to select `Regular Web Application`. Important to note is the -`auth0_subdomain` field which must be only the `.auth0.com`. So for the following `qhub-dev.auth0.com` the subdomain would be `qhub-dev`. Note that all the -usernames will be the email addresses of users (not usernames). +Otherwise here are docs on [creating an Auth0 Application](https://auth0.com/docs/applications). Make sure to select +`Regular Web Application`. Important to note is the `auth0_subdomain` field which must be only the +`.auth0.com`. So for the following `qhub-dev.auth0.com` the subdomain would be `qhub-dev`. Note that +all the usernames will be the email addresses of users (not usernames). > NOTE: This is a different and distinct step from one outlined in the [Setup](setup.md#auth0) stage. @@ -183,8 +199,9 @@ security: #### GitHub based authentication -GitHub has instructions for [creating OAuth applications](https://docs.github.com/en/developers/apps/building-oauth-apps/creating-an-oauth-app). Note that QHub usernames will be -their GitHub usernames. +GitHub has instructions for +[creating OAuth applications](https://docs.github.com/en/developers/apps/building-oauth-apps/creating-an-oauth-app). +Note that QHub usernames will be their GitHub usernames. ```yaml security: @@ -197,9 +214,10 @@ security: #### Password based authentication -This is the simplest authentication method. This just defers to however Keycloak is configured. That's also true for GitHub/Auth0 cases, except that for the single-sign on -providers the deployment will also configure those providers in Keycloak to save manual configuration. But it's also possible to add GitHub, or Google etc, as an Identity Provider -in Keycloak even if you formally select `password` authentication in the `qhub-config.yaml` file. +This is the simplest authentication method. This just defers to however Keycloak is configured. That's also true for +GitHub/Auth0 cases, except that for the single-sign on providers the deployment will also configure those providers in +Keycloak to save manual configuration. But it's also possible to add GitHub, or Google etc, as an Identity Provider in +Keycloak even if you formally select `password` authentication in the `qhub-config.yaml` file. ```yaml security: @@ -209,14 +227,18 @@ security: ### Keycloak -The `security.keycloak` section allows you to specify an initial password for the `root` user (to login at `https://myqhubsite.com/auth/admin/`) to manage your Keycloak database. +The `security.keycloak` section allows you to specify an initial password for the `root` user (to login at +`https://myqhubsite.com/auth/admin/`) to manage your Keycloak database. -We strongly recommend changing this `initial_root_password` after your initial deployment and deleting this value from your `qhub-config.yaml`. Any changes to this value on the -`qhub-config.yaml` after the initial deployment will have no affect. +We strongly recommend changing this `initial_root_password` after your initial deployment and deleting this value from +your `qhub-config.yaml`. Any changes to this value on the `qhub-config.yaml` after the initial deployment will have no +affect. -For more information on how to do this, see the ["Change Keycloak root password"](./login.md#change-keycloak-root-password) section. +For more information on how to do this, see the +["Change Keycloak root password"](./login.md#change-keycloak-root-password) section. -It's also possible to provide overrides to the [Keycloak Helm deployment](/~https://github.com/codecentric/helm-charts/tree/master/charts/keycloak). +It's also possible to provide overrides to the +[Keycloak Helm deployment](/~https://github.com/codecentric/helm-charts/tree/master/charts/keycloak). ``` security: @@ -229,19 +251,24 @@ security: #### User and group management -Groups and users of QHub are all defined in Keycloak. As above, access Keycloak as the `root` user, noting that the `root` user is not actually a QHub user - you cannot access the -main features of QHub such as JupyterLab with at user. It is only for Keycloak management. +Groups and users of QHub are all defined in Keycloak. As above, access Keycloak as the `root` user, noting that the +`root` user is not actually a QHub user - you cannot access the main features of QHub such as JupyterLab with at user. +It is only for Keycloak management. -Follow this links for more detailed information on [Keycloak user management](./login.md#add-user-using-keycloak-console) and [Keycloak group management](./login.md#groups). +Follow this links for more detailed information on +[Keycloak user management](./login.md#add-user-using-keycloak-console) and +[Keycloak group management](./login.md#groups). ## Provider Infrastructure -Finally, the Kubernetes infrastructure deployment. Although quite similar, each provider has a slightly different configuration. +Finally, the Kubernetes infrastructure deployment. Although quite similar, each provider has a slightly different +configuration. -The following configuration sets up a kubernetes deployment with autoscaling node groups. Depending on the cloud provider there might be restrictions, which are detailed on each -section. +The following configuration sets up a kubernetes deployment with autoscaling node groups. Depending on the cloud +provider there might be restrictions, which are detailed on each section. -For any of the providers (besides local), adding a node group is as easy as the following: which adds a `high-memory` group: +For any of the providers (besides local), adding a node group is as easy as the following: which adds a `high-memory` +group: ```yaml : @@ -254,35 +281,41 @@ For any of the providers (besides local), adding a node group is as easy as the ... ``` -> NOTE: For each provider, details such as **instance names**, **availability zones**, and **Kubernetes versions** will be DIFFERENT. +> NOTE: For each provider, details such as **instance names**, **availability zones**, and **Kubernetes versions** will +> be DIFFERENT. > NOTE: upgrading the `general` node instance type may not be possible for your chosen provider. > [See FAQ.](../user_guide/faq.md#i-want-to-upgrade-the-instance-size-the-general-node-group-is-this-possible) ### Providers -To take advantage of the auto-scaling and dask-distributed computing capabilities, QHub can be deployed on a handful of the most commonly used cloud providers. QHub utilizes many -of the resources these cloud providers have to offer, however, at it's core, is the Kubernetes engine (or service). Each cloud provider has slightly different ways Kubernetes is -configured but fear not, all of this is handled by QHub. +To take advantage of the auto-scaling and dask-distributed computing capabilities, QHub can be deployed on a handful of +the most commonly used cloud providers. QHub utilizes many of the resources these cloud providers have to offer, +however, at it's core, is the Kubernetes engine (or service). Each cloud provider has slightly different ways Kubernetes +is configured but fear not, all of this is handled by QHub. Listed below are the cloud providers QHub currently supports. -> NOTE: Many of the cloud providers regularly update their internal Kubernetes versions so if you wish to specify a particular version, please check the following resources. This -> is *completely optional* as QHub will, by default, select the most recent version available for your preferred cloud provider. -> [Digital Ocean](https://docs.digitalocean.com/products/kubernetes/changelog/) [Google Cloud Platform](https://cloud.google.com/kubernetes-engine/docs/release-notes-stable) +> NOTE: Many of the cloud providers regularly update their internal Kubernetes versions so if you wish to specify a +> particular version, please check the following resources. This is *completely optional* as QHub will, by default, +> select the most recent version available for your preferred cloud provider. +> [Digital Ocean](https://docs.digitalocean.com/products/kubernetes/changelog/) +> [Google Cloud Platform](https://cloud.google.com/kubernetes-engine/docs/release-notes-stable) > [Amazon Web Services](https://docs.aws.amazon.com/eks/latest/userguide/kubernetes-versions.html) > [Microsoft Azure](https://docs.microsoft.com/en-us/azure/aks/supported-kubernetes-versions?tabs=azure-cli) #### DigitalOcean -DigitalOcean has a restriction with autoscaling in that the minimum nodes allowed (`min_nodes` = 1) is one but is by far the cheapest provider even accounting for spot/preemptible -instances. In addition Digital Ocean doesn't have accelerator/gpu support. Digital Ocean is a great default choice for tying out QHub. Below is the recommended setup. +DigitalOcean has a restriction with autoscaling in that the minimum nodes allowed (`min_nodes` = 1) is one but is by far +the cheapest provider even accounting for spot/preemptible instances. In addition Digital Ocean doesn't have +accelerator/gpu support. Digital Ocean is a great default choice for tying out QHub. Below is the recommended setup. -> NOTE: DigitalOcean regularly updates Kubernetes versions hence, the field `kubernetes_version` will most likely have to be changed. -> [See available instance types for DigitalOcean](https://www.digitalocean.com/docs/droplets/). If you used `qhub init` this version will automatically be computed for you. Do not -> copy the version you see below. +> NOTE: DigitalOcean regularly updates Kubernetes versions hence, the field `kubernetes_version` will most likely have +> to be changed. [See available instance types for DigitalOcean](https://www.digitalocean.com/docs/droplets/). If you +> used `qhub init` this version will automatically be computed for you. Do not copy the version you see below. -To see available instance types refer to [Digital Ocean Instance Types](https://www.digitalocean.com/docs/droplets/). Additionally the Digital Ocean cli `doctl` has +To see available instance types refer to [Digital Ocean Instance Types](https://www.digitalocean.com/docs/droplets/). +Additionally the Digital Ocean cli `doctl` has [support for listing droplets](https://www.digitalocean.com/docs/apis-clis/doctl/reference/compute/droplet/list/). ```yaml @@ -306,8 +339,8 @@ digital_ocean: #### Google cloud provider -Google Cloud has the best support for QHub and is a great default choice for a production deployment. It allows auto-scaling to zero within the node group. There are no major -restrictions. +Google Cloud has the best support for QHub and is a great default choice for a production deployment. It allows +auto-scaling to zero within the node group. There are no major restrictions. To see available instance types refer to [GCP docs](https://cloud.google.com/compute/docs/machine-types). @@ -358,10 +391,11 @@ amazon_web_services: #### Azure -Microsoft Azure has similar settings for Kubernetes version, region, and instance names - using Azure's available values of course. +Microsoft Azure has similar settings for Kubernetes version, region, and instance names - using Azure's available values +of course. -Azure also requires a field named `storage_account_postfix` which will have been generated by `qhub init`. This allows qhub to create a Storage Account bucket that should be -globally unique. +Azure also requires a field named `storage_account_postfix` which will have been generated by `qhub init`. This allows +qhub to create a Storage Account bucket that should be globally unique. ``` azure: @@ -385,14 +419,16 @@ azure: #### Local (existing) Kubernetes cluster -Originally designed for QHub deployments on a "local" minikube cluster, this feature has now expanded to allow users to deploy QHub to any existing kubernetes cluster. The default -options for a `local` deployment are still set deploy QHub to a minikube cluster. +Originally designed for QHub deployments on a "local" minikube cluster, this feature has now expanded to allow users to +deploy QHub to any existing kubernetes cluster. The default options for a `local` deployment are still set deploy QHub +to a minikube cluster. -If you wish to deploy QHub to an existing kubernetes cluster on one of the cloud providers, please refer to a more detailed walkthrough found in the -[Deploy QHub to an Existing Kubernetes Cluster](./existing.md). +If you wish to deploy QHub to an existing kubernetes cluster on one of the cloud providers, please refer to a more +detailed walkthrough found in the [Deploy QHub to an Existing Kubernetes Cluster](./existing.md). -Deploying to a local existing kubernetes cluster has different options than the cloud providers. `kube_context` is an optional key that can be used to deploy to a non-default -context. The default node selectors will allow pods to be scheduled anywhere. This can be adjusted to schedule pods on different labeled nodes. Allowing for similar functionality +Deploying to a local existing kubernetes cluster has different options than the cloud providers. `kube_context` is an +optional key that can be used to deploy to a non-default context. The default node selectors will allow pods to be +scheduled anywhere. This can be adjusted to schedule pods on different labeled nodes. Allowing for similar functionality to node groups in the cloud. ```yaml @@ -412,14 +448,17 @@ local: ## Terraform state -Terraform manages the state of all the deployed resources via [backends](https://www.terraform.io/language/settings/backends). Terraform requires storing the state in order to keep -track of the names, ids, and states of deployed resources. The simplest approach is storing the state on the local filesystem but isn't recommended and isn't the default of QHub. -`terraform_state` is either `remote`, `existing` or `local` with a default value of `remote`. This decides whether to control the state of the cluster `local` via tfstate file (not -recommended), on an already `existing` terraform state store or remotely and auto creating the terraform state store. See -[terraform remote state](https://www.terraform.io/language/state/remote) docs. If you are doing anything other than testing we highly recommend `remote` unless you know what you -are doing. +Terraform manages the state of all the deployed resources via +[backends](https://www.terraform.io/language/settings/backends). Terraform requires storing the state in order to keep +track of the names, ids, and states of deployed resources. The simplest approach is storing the state on the local +filesystem but isn't recommended and isn't the default of QHub. `terraform_state` is either `remote`, `existing` or +`local` with a default value of `remote`. This decides whether to control the state of the cluster `local` via tfstate +file (not recommended), on an already `existing` terraform state store or remotely and auto creating the terraform state +store. See [terraform remote state](https://www.terraform.io/language/state/remote) docs. If you are doing anything +other than testing we highly recommend `remote` unless you know what you are doing. -The following are examples. `remote` and `local` are straightforward. For a `local` provider that deploys on an existing kubernetes cluster the kubernetes remote backend is used. +The following are examples. `remote` and `local` are straightforward. For a `local` provider that deploys on an existing +kubernetes cluster the kubernetes remote backend is used. ```yaml terraform_state: @@ -431,7 +470,8 @@ terraform_state: type: local ``` -Using an existing terraform backend can be done by specifying the `backend` and arbitrary key/value pairs in the `config`. +Using an existing terraform backend can be done by specifying the `backend` and arbitrary key/value pairs in the +`config`. ```yaml terraform_state: @@ -445,8 +485,9 @@ terraform_state: ## Default Images -Default images are to the default image run if not specified in a profile (described in the next section). The `jupyterhub` key controls the jupyterhub image run. These control the -docker image used to run JupyterHub, the default JupyterLab image, and the default Dask worker image. +Default images are to the default image run if not specified in a profile (described in the next section). The +`jupyterhub` key controls the jupyterhub image run. These control the docker image used to run JupyterHub, the default +JupyterLab image, and the default Dask worker image. ```yaml default_images: @@ -459,8 +500,9 @@ default_images: Control the amount of storage allocated to shared filesystem. -> NOTE 1: when the storage size is changed, for most providers it will automatically delete (!) the previous storage place. NOTE 2: changing the storage size on an AWS deployment -> after the initial deployment can be especially tricky so it might be worthwhile padding these storage sizes. +> NOTE 1: when the storage size is changed, for most providers it will automatically delete (!) the previous storage +> place. NOTE 2: changing the storage size on an AWS deployment after the initial deployment can be especially tricky so +> it might be worthwhile padding these storage sizes. ```yaml storage: @@ -522,38 +564,46 @@ profiles: For each `profiles.jupyterlab` is a named JupyterLab profile. -Use the `kubespawner_override` field to define behavior as per the [KubeSpawner](https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html) API. +Use the `kubespawner_override` field to define behavior as per the +[KubeSpawner](https://jupyterhub-kubespawner.readthedocs.io/en/latest/spawner.html) API. -It is possible to control which users have access to which JupyterLab profiles. Each profile has a field named `access` which can be set to `all` (default if omitted), `yaml`, or -`keycloak`. +It is possible to control which users have access to which JupyterLab profiles. Each profile has a field named `access` +which can be set to `all` (default if omitted), `yaml`, or `keycloak`. `all` means every user will have access to the profile. -`yaml` means that access is restricted to anyone with their username in the `users` field of the profile or who belongs to a group named in the `groups` field. +`yaml` means that access is restricted to anyone with their username in the `users` field of the profile or who belongs +to a group named in the `groups` field. -`keycloak` means that access is restricted to any user who in Keycloak has either their group(s) or user with the attribute `jupyterlab_profiles` containing this profile name. For -example, if the user is in a Keycloak group named `developers` which has an attribute `jupyterlab_profiles` set to `Large Instance`, they will have access to the Large Instance -profile. To specify multiple profiles for one group (or user) delimit their names using `##` - for example, `Large Instance##Another Instance`. +`keycloak` means that access is restricted to any user who in Keycloak has either their group(s) or user with the +attribute `jupyterlab_profiles` containing this profile name. For example, if the user is in a Keycloak group named +`developers` which has an attribute `jupyterlab_profiles` set to `Large Instance`, they will have access to the Large +Instance profile. To specify multiple profiles for one group (or user) delimit their names using `##` - for example, +`Large Instance##Another Instance`. ### Dask Profiles -Finally, we allow for configuration of the Dask workers. In general, similar to the JupyterLab instances you only need to configuration the cores and memory. +Finally, we allow for configuration of the Dask workers. In general, similar to the JupyterLab instances you only need +to configuration the cores and memory. -When configuring the memory and CPUs for profiles there are some important considerations to make. Two important terms to understand are: +When configuring the memory and CPUs for profiles there are some important considerations to make. Two important terms +to understand are: -- `limit`: the absolute max memory that a given pod can consume. If a process within the pod consumes more than the `limit` memory the linux OS will kill the process. LimIt is not - used for scheduling purposes with kubernetes. -- `guarantee`: is the amount of memory the kubernetes scheduler uses to place a given pod. In general the `guarantee` will be less than the limit. Often times the node itself has - less available memory than the node specification. See this [guide from digital ocean](https://docs.digitalocean.com/products/kubernetes/#allocatable-memory) which is generally - applicable to other clouds. +- `limit`: the absolute max memory that a given pod can consume. If a process within the pod consumes more than the + `limit` memory the linux OS will kill the process. LimIt is not used for scheduling purposes with kubernetes. +- `guarantee`: is the amount of memory the kubernetes scheduler uses to place a given pod. In general the `guarantee` + will be less than the limit. Often times the node itself has less available memory than the node specification. See + this [guide from digital ocean](https://docs.digitalocean.com/products/kubernetes/#allocatable-memory) which is + generally applicable to other clouds. -For example if a node has 8 GB of ram and 2 CPUs you should guarantee/schedule roughly 75% and follow the digital ocean guide linked above. For example 1.5 CPU guarantee and 5.5 GB -guaranteed. +For example if a node has 8 GB of ram and 2 CPUs you should guarantee/schedule roughly 75% and follow the digital ocean +guide linked above. For example 1.5 CPU guarantee and 5.5 GB guaranteed. ### Dask Scheduler -In a few instances, the Dask worker node-group might be running on quite a large instance, perhaps with 8 CPUs and 32 GB of memory (or more). When this is the case, you might also -want to increase the resource levels associated with the Dask Scheduler. +In a few instances, the Dask worker node-group might be running on quite a large instance, perhaps with 8 CPUs and 32 GB +of memory (or more). When this is the case, you might also want to increase the resource levels associated with the Dask +Scheduler. ```yaml dask_worker: @@ -570,8 +620,9 @@ dask_worker: ### JupyterLab Profile Node Selectors -A common operation is to target jupyterlab profiles to specific node labels. In order to target a specific node groups add the following. This example shows a GKE node groups with -name `user-large`. Other cloud providers will have different node labels. +A common operation is to target jupyterlab profiles to specific node labels. In order to target a specific node groups +add the following. This example shows a GKE node groups with name `user-large`. Other cloud providers will have +different node labels. ```yaml profiles: @@ -605,7 +656,8 @@ profiles: ### Customizing JupyterHub theme -JupyterHub can be customized since QHub uses [Quansight/qhub-jupyterhub-theme](/~https://github.com/quansight/qhub-jupyterhub-theme). Available theme options. +JupyterHub can be customized since QHub uses +[Quansight/qhub-jupyterhub-theme](/~https://github.com/quansight/qhub-jupyterhub-theme). Available theme options. > NOTE: if you want to change the logo it must be an accessible URL to the logo. @@ -629,8 +681,9 @@ theme: h2_color: '#652e8e' ``` -Its also possible to display the current version of qhub by using the `display_version: 'True'` in the above `theme.jupyterhub` configuration. If no extra information is passed, -the displayed version will be the same as `qhub_version`, an overwrite can be done by passing the `version: v.a.b.c` key as well. +Its also possible to display the current version of qhub by using the `display_version: 'True'` in the above +`theme.jupyterhub` configuration. If no extra information is passed, the displayed version will be the same as +`qhub_version`, an overwrite can be done by passing the `version: v.a.b.c` key as well. ## Environments @@ -667,27 +720,32 @@ environments: - pyyaml ``` -QHub is experimenting with a new way of distributing environments using [conda-store](/~https://github.com/quansight/conda-store). Please expect this environment distribution method -to change over time. +QHub is experimenting with a new way of distributing environments using +[conda-store](/~https://github.com/quansight/conda-store). Please expect this environment distribution method to change +over time. -Each environment configuration is a `environment.` mapping to a conda environment definition file. If you need to pin a specific version, please include it in the -definition. One current requirement is that each environment include `ipykernel`, `ipywidgets`, `qhub-dask==0.2.3`. Upon changing the environment definition expect 1-10 minutes -upon deployment of the configuration for the environment to appear. +Each environment configuration is a `environment.` mapping to a conda environment definition file. If you need +to pin a specific version, please include it in the definition. One current requirement is that each environment include +`ipykernel`, `ipywidgets`, `qhub-dask==0.2.3`. Upon changing the environment definition expect 1-10 minutes upon +deployment of the configuration for the environment to appear. ## qhub_version -All `qhub-config.yaml` files must now contain a `qhub_version` field displaying the version of QHub which it's intended to be deployed with. +All `qhub-config.yaml` files must now contain a `qhub_version` field displaying the version of QHub which it's intended +to be deployed with. QHub will refuse to deploy if it doesn't contain the same version as that of the `qhub` command. -Typically, you can upgrade the qhub-config.yaml file itself using the [`qhub upgrade` command](../admin_guide/upgrade.md). This will update image numbers, plus updating -qhub_version to match the installed version of `qhub`, as well as any other bespoke changes required. +Typically, you can upgrade the qhub-config.yaml file itself using the +[`qhub upgrade` command](../admin_guide/upgrade.md). This will update image numbers, plus updating qhub_version to match +the installed version of `qhub`, as well as any other bespoke changes required. ## JupyterHub -JupyterHub uses the [zero to jupyterhub helm chart](/~https://github.com/jupyterhub/zero-to-jupyterhub-k8s/). This chart has many options that are not configured in the QHub default -installation. You can override specific values in the [values.yaml](/~https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/main/jupyterhub/values.yaml). `jupyterhub.overrides` -is optional. +JupyterHub uses the [zero to jupyterhub helm chart](/~https://github.com/jupyterhub/zero-to-jupyterhub-k8s/). This chart +has many options that are not configured in the QHub default installation. You can override specific values in the +[values.yaml](/~https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/main/jupyterhub/values.yaml). +`jupyterhub.overrides` is optional. ```yaml jupyterhub: @@ -698,16 +756,19 @@ jupyterhub: ## Terraform Overrides -The QHub configuration file provides a huge number of configuration options for customizing your QHub Infrastructure, while these options are sufficient for an average user, but -aren't exhaustive by any means. There are still a plenty of things you might want to achieve which cannot be configured directly by the above mentioned options, hence we've -introduced a new option called terraform overrides (`terraform_overrides`), which lets you override the values of terraform variables in specific modules/resource. This is a -relatively advance feature and must be used with utmost care and you should really know, what you're doing. +The QHub configuration file provides a huge number of configuration options for customizing your QHub Infrastructure, +while these options are sufficient for an average user, but aren't exhaustive by any means. There are still a plenty of +things you might want to achieve which cannot be configured directly by the above mentioned options, hence we've +introduced a new option called terraform overrides (`terraform_overrides`), which lets you override the values of +terraform variables in specific modules/resource. This is a relatively advance feature and must be used with utmost care +and you should really know, what you're doing. Here we describe the overrides supported via QHub config file: ### Ingress -You can configure the IP of the load balancer and add annotations for the same via `ingress`'s terraform overrides, one such example for GCP is: +You can configure the IP of the load balancer and add annotations for the same via `ingress`'s terraform overrides, one +such example for GCP is: ```yaml ingress: @@ -738,7 +799,8 @@ azure: #### Google Cloud -Using terraform overrides you can also deploy inside a VPC in GCP, making the Kubernetes cluster private. Here is an example for configuring the same: +Using terraform overrides you can also deploy inside a VPC in GCP, making the Kubernetes cluster private. Here is an +example for configuring the same: ```yaml google_cloud_platform: @@ -755,8 +817,9 @@ google_cloud_platform: display_name: null ``` -As the name suggests the cluster will be private, which means it would not have access to the internet either, which is not ideal for deploying pods in the cluster, hence we need -to allow internet access for the cluster, which can be achieved by creating a NAT router by running the following two commands for your vpc network. +As the name suggests the cluster will be private, which means it would not have access to the internet either, which is +not ideal for deploying pods in the cluster, hence we need to allow internet access for the cluster, which can be +achieved by creating a NAT router by running the following two commands for your vpc network. ``` gcloud compute routers create qhub-nat-router --network your-vpc-name --region your-region @@ -766,9 +829,11 @@ gcloud compute routers nats create nat-config --router qhub-nat-router --nat-al #### Deployment Notes -Deployment inside a virtual network is slightly different from deploying inside a public network, as the name suggests, since its a virtual private network, you need to be inside -the network to able to deploy and access QHub. One way to achieve this is by creating a Virtual Machine inside the virtual network, just select the virtual network and subnet name -under the networking settings of your cloud provider while creating the VM and then follow the usual deployment instructions as you would deploy from your local machine. +Deployment inside a virtual network is slightly different from deploying inside a public network, as the name suggests, +since its a virtual private network, you need to be inside the network to able to deploy and access QHub. One way to +achieve this is by creating a Virtual Machine inside the virtual network, just select the virtual network and subnet +name under the networking settings of your cloud provider while creating the VM and then follow the usual deployment +instructions as you would deploy from your local machine. # Full configuration example diff --git a/docs/source/installation/existing.md b/docs/source/installation/existing.md index 0f2ea6634b..13c3d79e81 100644 --- a/docs/source/installation/existing.md +++ b/docs/source/installation/existing.md @@ -1,19 +1,22 @@ # Deploy QHub to an existing kubernetes cluster -If you have an existing kubernetes cluster running in the cloud and would like to deploy QHub on the same cluster, this is the guide for you. +If you have an existing kubernetes cluster running in the cloud and would like to deploy QHub on the same cluster, this +is the guide for you. -To illustrate how this is done, the guide walks through a simple example. The guide below is meant to serve as a reference, the setup of your existing kubernetes might differ -rending some of these additional setups steps unnecessary. +To illustrate how this is done, the guide walks through a simple example. The guide below is meant to serve as a +reference, the setup of your existing kubernetes might differ rending some of these additional setups steps unnecessary. ## Deploy QHub to an existing AWS EKS cluster In this example, there already exists a basic web app running on an EKS cluster. [Here is the tutorial on how to setup this particular Guestbook web app](https://logz.io/blog/amazon-eks-cluster/). -The existing EKS cluster has one VPC with three subnets (each in their own Availability Zone) and no node groups. There are three nodes each running on a `t3.medium` EC2 instance, -unfortunately QHub's `general` node group requires a more powerful instance type. +The existing EKS cluster has one VPC with three subnets (each in their own Availability Zone) and no node groups. There +are three nodes each running on a `t3.medium` EC2 instance, unfortunately QHub's `general` node group requires a more +powerful instance type. -Now create three new node groups in preparation for the incoming QHub deployment. Before proceeding, ensure the following: +Now create three new node groups in preparation for the incoming QHub deployment. Before proceeding, ensure the +following: - that the subnets can ["automatically assign public IP addresses to instances launched into it"](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-ip-addressing.html#subnet-public-ip) @@ -74,8 +77,9 @@ Now create three new node groups in preparation for the incoming QHub deployment Skip this step if node groups already exist. -For AWS, [follow this guide to create new node groups](https://docs.aws.amazon.com/eks/latest/userguide/create-managed-node-group.html). Be sure to fill in the following fields -carefully: +For AWS, +[follow this guide to create new node groups](https://docs.aws.amazon.com/eks/latest/userguide/create-managed-node-group.html). +Be sure to fill in the following fields carefully: - "Node Group configuration" - `Name` must be either `general`, `user` or `worker` @@ -247,4 +251,5 @@ Once updated, deploy QHub. When prompted be ready to manually update the DNS rec python -m qhub deploy --config qhub-config.yaml ``` -The deployment completes successfully and all the pods appear to be running and so do the pre-existing Guestbook web app. +The deployment completes successfully and all the pods appear to be running and so do the pre-existing Guestbook web +app. diff --git a/docs/source/installation/login.md b/docs/source/installation/login.md index e00fe94653..c74fbecc58 100644 --- a/docs/source/installation/login.md +++ b/docs/source/installation/login.md @@ -1,32 +1,39 @@ # Login -[Keycloak](https://www.keycloak.org/) is the name of the open-source user management software that's automatically deployed within QHub. It's used to store the database of all -users in your QHub instance, and can provide connectivity to other services such as GitHub/Auth0 single sign-on. +[Keycloak](https://www.keycloak.org/) is the name of the open-source user management software that's automatically +deployed within QHub. It's used to store the database of all users in your QHub instance, and can provide connectivity +to other services such as GitHub/Auth0 single sign-on. -As with many user admin tools, in Keycloak you start with an initial login (`root`) that has the ability to administer and create new users. The `root` user is a Keycloak-specific -user. It can only be used to login and manage the Keycloak identity management section of QHub. It's not a user of the wider QHub data science platform. +As with many user admin tools, in Keycloak you start with an initial login (`root`) that has the ability to administer +and create new users. The `root` user is a Keycloak-specific user. It can only be used to login and manage the Keycloak +identity management section of QHub. It's not a user of the wider QHub data science platform. ## Change Keycloak root password -`root`'s password is generated by the `qhub init` command. If you ran this command while following the steps under [Usage](usage.md), you should have seen something like the -following in your terminal output: +`root`'s password is generated by the `qhub init` command. If you ran this command while following the steps under +[Usage](usage.md), you should have seen something like the following in your terminal output: ``` Securely generated default random password= for Keycloak root user ``` -The init command also saves the root user password to your `qhub-config.yaml` configuration file under the following path: +The init command also saves the root user password to your `qhub-config.yaml` configuration file under the following +path: `security.keycloak.initial_root_password` -After the initial deployment, it is **highly** recommended that you change the Keycloak `root` user password as soon as you can. +After the initial deployment, it is **highly** recommended that you change the Keycloak `root` user password as soon as +you can. -> NOTE: Once you change the root password you will not be able to [add users from the command line](login.md#add-user-from-the-command-line) +> NOTE: Once you change the root password you will not be able to +> [add users from the command line](login.md#add-user-from-the-command-line) -> NOTE: From this point on, the `security.keycloak.initial_root_password` field in `qhub-config.yaml` has no effect. If you redeploy QHub it will not reset the password back to the -> old one (or anything else that might be in the field in your YAML file). We strongly recommend you delete this field to prevent later confusion. +> NOTE: From this point on, the `security.keycloak.initial_root_password` field in `qhub-config.yaml` has no effect. If +> you redeploy QHub it will not reset the password back to the old one (or anything else that might be in the field in +> your YAML file). We strongly recommend you delete this field to prevent later confusion. -1. To change the `root` user password, go to your QHub instance's admin dashboard—e.g., something like —and log in with the password provided. +1. To change the `root` user password, go to your QHub instance's admin dashboard—e.g., something like + —and log in with the password provided. ![QHub admin view - Root Login to Keycloak form](../images/keycloak_master_login.png) @@ -38,18 +45,21 @@ After the initial deployment, it is **highly** recommended that you change the K ![Keycloak root user page - account security](../images/keycloak_root_user_account_security.png) -4. In the Password section, click the 'Update' button. This will guide you through entering your existing root password, and then creating a new password. +4. In the Password section, click the 'Update' button. This will guide you through entering your existing root password, + and then creating a new password. ![Keycloak root user page - account security, update password](../images/keycloak_root_user_update_password.png) ## Adding a QHub user -You will need to add a QHub user in order to log in to your QHub platform. If you have chosen to use GitHub or Auth0 single-sign-on, you must ensure the value you enter in Keycloak -under 'Username' exactly matches the usernames from GitHub or Auth0, respectively. +You will need to add a QHub user in order to log in to your QHub platform. If you have chosen to use GitHub or Auth0 +single-sign-on, you must ensure the value you enter in Keycloak under 'Username' exactly matches the usernames from +GitHub or Auth0, respectively. ### Add user using Keycloak console -To add a QHub user from the web console for Keycloak, visit . Log in using the username `root`, as shown above. +To add a QHub user from the web console for Keycloak, visit . Log in using the +username `root`, as shown above. All QHub users will be part of the `qhub` realm (a realm is a distinct identity provider and set of users in Keycloak). @@ -65,10 +75,11 @@ Steps to create a new user: ![Keycloak add user tab screenshot - new user form](../images/keycloak_add_users.png) -3. Fill out the three fields outlined above. These are 'Username', 'Email', and 'Groups'. (We explain these fields below.) Then click save. +3. Fill out the three fields outlined above. These are 'Username', 'Email', and 'Groups'. (We explain these fields + below.) Then click save. -4. Username. Depending on the authentication provider selected ('password', 'GitHub' or 'Auth0'), the values entered into the 'Username' field will differ slightly. The following - table outlines those differences: +4. Username. Depending on the authentication provider selected ('password', 'GitHub' or 'Auth0'), the values entered + into the 'Username' field will differ slightly. The following table outlines those differences: | | Password | GitHub | Auth0 | | -------- | ----------------- | ----------------- | --------------------- | @@ -78,8 +89,8 @@ Steps to create a new user: > NOTE: Although not required, users may not be able to log in to Grafana if this field isn't properly set. -6. Lastly, associate the user with one or more of the 'Groups'. Out of the box, QHub is deployed with the following groups: 'admin', 'analyst', and 'developer' (see the - [Groups](./login.md#groups) section below for more details). +6. Lastly, associate the user with one or more of the 'Groups'. Out of the box, QHub is deployed with the following + groups: 'admin', 'analyst', and 'developer' (see the [Groups](./login.md#groups) section below for more details). 7. Click save. @@ -89,13 +100,15 @@ Once the user is created, you can set a password. ![Keycloak add user > credentials tab screenshot - set password](../images/keycloak_user_password.png) -It's best to put the 'Temporary' toggle in the 'OFF' position. Otherwise the user will be forced to change the password on first login. +It's best to put the 'Temporary' toggle in the 'OFF' position. Otherwise the user will be forced to change the password +on first login. ### Add user from the command line To make adding users easier for new QHub deployments, there is a QHub command that can help. -> NOTE: If you [changed the initial_root_password for Keycloak](login.md#change-keycloak-root-password) this method will not work. +> NOTE: If you [changed the initial_root_password for Keycloak](login.md#change-keycloak-root-password) this method will +> not work. > NOTE: This method is primarily used by developers as a quick workaround. @@ -105,7 +118,8 @@ Run: qhub keycloak -c qhub-config.yaml adduser ``` -This will create a user `` with the initial password provided. Omit the password completely if you are using GitHub or Auth0. +This will create a user `` with the initial password provided. Omit the password completely if you are using +GitHub or Auth0. > NOTE: This will also add the user to the 'analyst' group. @@ -119,13 +133,15 @@ Click 'Sign in with Keycloak'. This will take you to the login form: ![QHub - Log in to Keycloak page](../images/keycloak_qhub_login.png) -If you chose GitHub or Auth0 login, click the 'GitHub' button to be taken to a GitHub login page and single-sign-on from there (as shown in the screenshot above). Otherwise, if you -chose 'Password' based authentication, enter the username and password you chose when you added a user to QHub above. +If you chose GitHub or Auth0 login, click the 'GitHub' button to be taken to a GitHub login page and single-sign-on from +there (as shown in the screenshot above). Otherwise, if you chose 'Password' based authentication, enter the username +and password you chose when you added a user to QHub above. ## Groups -Groups represent a collection of users that perform similar actions and therefore require similar permissions. By default, QHub is deployed with the following groups: 'admin', -'developer', 'analyst' and 'viewer' (in roughly descending order of power). +Groups represent a collection of users that perform similar actions and therefore require similar permissions. By +default, QHub is deployed with the following groups: 'admin', 'developer', 'analyst' and 'viewer' (in roughly descending +order of power). | Group | Access to QHub Resources | | ----------- | ------------------------------------------------------------------------------------------ | @@ -135,7 +151,8 @@ Groups represent a collection of users that perform similar actions and therefor To create new groups or modify (or delete) existing groups, log in as `root` and click 'Groups' on the left-hand side. -As an example, we create a new group named `conda-store-manager`. This group will have administrator access to the Conda-Store service. +As an example, we create a new group named `conda-store-manager`. This group will have administrator access to the +Conda-Store service. ![Keycloak groups tab screenshot - user groups view](../images/keycloak_groups.png) @@ -143,12 +160,14 @@ To create a new group, click 'New' in the upper-right hand corner. First, give t ![Keycloak add group form - name field set to conda-store-manager](../images/keycloak_new_group1.png) -Then under 'Role Mapping', add the appropriate 'Client Roles' as needed; there should be no need to update the 'Realm Roles'. In this example, the new group only has one mapped -role however it's possible to attached multiple 'Client Roles' to a single group. +Then under 'Role Mapping', add the appropriate 'Client Roles' as needed; there should be no need to update the 'Realm +Roles'. In this example, the new group only has one mapped role however it's possible to attached multiple 'Client +Roles' to a single group. ![Keycloak group conda-store-manager form - role mappings tab focused with expanded client roles dropdown](../images/keycloak_new_group2.png) -In this example, the new group only has one mapped role (`conda_store_admin`); however it's possible to attached multiple 'Client Roles' to a single group. +In this example, the new group only has one mapped role (`conda_store_admin`); however it's possible to attached +multiple 'Client Roles' to a single group. ![Keycloak group conda-store-manager form - role mappings tab focused ](../images/keycloak_new_group3.png) diff --git a/docs/source/installation/management.md b/docs/source/installation/management.md index 4147487a7b..af1ea6ea52 100644 --- a/docs/source/installation/management.md +++ b/docs/source/installation/management.md @@ -4,7 +4,8 @@ One of the first things you might want to do is to **add new users** to your QHub. -This can be done through the Keycloak web console. See [Adding a QHub user](https://docs.qhub.dev/en/stable/source/installation/login.html#adding-a-qhub-user) for details. +This can be done through the Keycloak web console. See +[Adding a QHub user](https://docs.qhub.dev/en/stable/source/installation/login.html#adding-a-qhub-user) for details. ## Upgrades and dependencies management @@ -15,7 +16,8 @@ To update a current conda environment and redeploy you will need to: - Create a new branch on your repository - Make changes to the `qhub-config.yaml` file under the `environments` key. -> NOTE: in [YAML](https://yaml.org/spec/1.2/spec.html#mapping//), each level is a dictionary key, and every 2 white spaces represent values for those keys. +> NOTE: in [YAML](https://yaml.org/spec/1.2/spec.html#mapping//), each level is a dictionary key, and every 2 white +> spaces represent values for those keys. To add a new environment, add two spaces below the `environments` key such as the example below. @@ -30,18 +32,23 @@ environments: - pandas ``` -Commit the changes, and make a [PR](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) into a master branch. The update will -take from 5 to 30 minutes to complete, depending on the environment's complexity. If after 30 minutes the new environment is still not available, check the latest log files from -the user instance in the `/home/conda/store/.logs` directory to troubleshoot. +Commit the changes, and make a +[PR](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/creating-a-pull-request) into a +master branch. The update will take from 5 to 30 minutes to complete, depending on the environment's complexity. If +after 30 minutes the new environment is still not available, check the latest log files from the user instance in the +`/home/conda/store/.logs` directory to troubleshoot. -- Note that the current version will not notify you if an environment fails to solve. The only way to see failures is by manually checking the above logs.\* +- Note that the current version will not notify you if an environment fails to solve. The only way to see failures is by + manually checking the above logs.\* ## Copy Files into Users' Home Folders -Within their own JupyterLab sessions, admins can add files to a folder called `shared/.userskel`. Any files in there will be copied to a user's own home folder whenever they start -a new JupyterLab session. Existing files with the same name will not be overwritten. Admin users are defined as members of the admin group as specified in your `qhub-config.yaml` -file. +Within their own JupyterLab sessions, admins can add files to a folder called `shared/.userskel`. Any files in there +will be copied to a user's own home folder whenever they start a new JupyterLab session. Existing files with the same +name will not be overwritten. Admin users are defined as members of the admin group as specified in your +`qhub-config.yaml` file. ## Monitor your QHub deployment -You can install `k9s` for debugging and deep monitoring of the system deployment - see [here](../admin_guide/troubleshooting.md). +You can install `k9s` for debugging and deep monitoring of the system deployment - see +[here](../admin_guide/troubleshooting.md). diff --git a/docs/source/installation/setup.md b/docs/source/installation/setup.md index acd0404ea4..82b14e9f5e 100644 --- a/docs/source/installation/setup.md +++ b/docs/source/installation/setup.md @@ -1,9 +1,10 @@ # Setup Initialization -QHub handles the initial setup and management of configurable data science environments, allowing users to deploy seamlessly using Github Actions. +QHub handles the initial setup and management of configurable data science environments, allowing users to deploy +seamlessly using Github Actions. -QHub can be installed on a bare-metal server using HPC, on a Cloud provider or even locally for testing purposes. Review the options below to discover which option best suits your -needs. +QHub can be installed on a bare-metal server using HPC, on a Cloud provider or even locally for testing purposes. Review +the options below to discover which option best suits your needs. ## Local Deployment or Existing Kubernetes Cluster @@ -14,7 +15,8 @@ The local version is recommended for testing QHub's components due to its simpli - You have available local compute setup - You want to try out QHub with a quick-install to see how it works, without setting up environment variables -You should choose another installation option if you are starting from scratch (i.e., no clusters yet) and aiming to have a production environment. +You should choose another installation option if you are starting from scratch (i.e., no clusters yet) and aiming to +have a production environment. ## HPC Deployment @@ -24,33 +26,37 @@ The [QHub HPC](https://hpc.qhub.dev/en/latest/) should be your choice if: - You have existing infrastructure already available - You expect that your infrastructure will **not** exceed the existing resources capabilities -> NOTE: Although it is possible to deploy QHub HPC on the Cloud, it is not generally recommended due to possible high costs. For more information, check out the -> [base cost](../admin_guide/cost.md) section of the docs. +> NOTE: Although it is possible to deploy QHub HPC on the Cloud, it is not generally recommended due to possible high +> costs. For more information, check out the [base cost](../admin_guide/cost.md) section of the docs. ## Kubernetes Deployment -The Kubernetes deployment of QHub is considered to be the default option. If you are not sure which option to choose, try this one. It is suitable for most use cases, especially -if: +The Kubernetes deployment of QHub is considered to be the default option. If you are not sure which option to choose, +try this one. It is suitable for most use cases, especially if: - You require scalable infrastructure - You aim to have a production environment with GitOps enabled by default -The QHub version requires a choice of [Cloud provider](#cloud-provider), [authentication (using Auth0, GitHub, custom OAuth provider, or password based)](#authentication), +The QHub version requires a choice of [Cloud provider](#cloud-provider), +[authentication (using Auth0, GitHub, custom OAuth provider, or password based)](#authentication), [domain registration](#domain-registry), and CI provider (GitHub Actions, GitLab CI). -These services require global [environment variables](https://linuxize.com/post/how-to-set-and-list-environment-variables-in-linux/) that once set up, will trigger QHub's automatic -deploy using your CI/CD platform of choice. +These services require global +[environment variables](https://linuxize.com/post/how-to-set-and-list-environment-variables-in-linux/) that once set up, +will trigger QHub's automatic deploy using your CI/CD platform of choice. To find and set the environment variables, follow the steps described on the subsections below. ### Cloud Provider -The first required step is to **choose a Cloud Provider to host the project deployment**. The cloud installation is based on Kubernetes, but knowledge of Kubernetes is **NOT** -required nor is in depth knowledge about the specific provider required either. QHub supports [Amazon AWS](#amazon-web-services-aws), [DigitalOcean](#digital-ocean), +The first required step is to **choose a Cloud Provider to host the project deployment**. The cloud installation is +based on Kubernetes, but knowledge of Kubernetes is **NOT** required nor is in depth knowledge about the specific +provider required either. QHub supports [Amazon AWS](#amazon-web-services-aws), [DigitalOcean](#digital-ocean), [GCP](#google-cloud-platform), and [Azure](#microsoft-azure). -To deploy QHub, all access keys require fairly wide permissions to create all the necessary cloud resources. Hence, once the Cloud provider has been chosen, follow the steps below -and set the environment variables as specified with **owner/admin** level permissions. +To deploy QHub, all access keys require fairly wide permissions to create all the necessary cloud resources. Hence, once +the Cloud provider has been chosen, follow the steps below and set the environment variables as specified with +**owner/admin** level permissions. For more details on configuration for each Cloud provider, check the How-To Guides section of the documentation. @@ -58,8 +64,10 @@ For more details on configuration for each Cloud provider, check the How-To Guid
Click for AWS configuration instructions -Please see these instructions for [creating an IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html) with administrator permissions. Upon generation, the -IAM role will provide a public **access key ID** and a **secret key** which will need to be added to the environment variables. +Please see these instructions for +[creating an IAM role](https://docs.aws.amazon.com/IAM/latest/UserGuide/id_roles_create.html) with administrator +permissions. Upon generation, the IAM role will provide a public **access key ID** and a **secret key** which will need +to be added to the environment variables. To define the environment variables paste the commands below with your respective keys. @@ -74,9 +82,11 @@ export AWS_SECRET_ACCESS_KEY="iNtheJUng1etheMightyJUNgleTHEl10N51eEpsT0n1ghy;"
Click to expand DigitalOcean configuration directions -Please see these instructions for [creating a Digital Ocean token](https://www.digitalocean.com/docs/apis-clis/api/create-personal-access-token/). In addition to a `token`, a -`spaces key` (similar to AWS S3) credentials are also required. Follow the instructions on the -[official docs](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) for more information. +Please see these instructions for +[creating a Digital Ocean token](https://www.digitalocean.com/docs/apis-clis/api/create-personal-access-token/). In +addition to a `token`, a `spaces key` (similar to AWS S3) credentials are also required. Follow the instructions on the +[official docs](https://www.digitalocean.com/community/tutorials/how-to-create-a-digitalocean-space-and-api-key) for +more information. > Note: DigitalOcean's permissions model isn't as fine-grained as the other supported Cloud providers. @@ -96,9 +106,11 @@ export AWS_SECRET_ACCESS_KEY="" # set this variable identical to `SPACES_S
Click for CGP configuration specs -Follow [these detailed instructions](https://cloud.google.com/iam/docs/creating-managing-service-accounts) to create a Google Service Account with **owner level** permissions. -Then, follow the steps described on the official [GCP docs](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#iam-service-account-keys-create-console) to -create and download a JSON credentials file. Store this credentials file in a well known location and make sure to set yourself exclusive permissions. +Follow [these detailed instructions](https://cloud.google.com/iam/docs/creating-managing-service-accounts) to create a +Google Service Account with **owner level** permissions. Then, follow the steps described on the official +[GCP docs](https://cloud.google.com/iam/docs/creating-managing-service-account-keys#iam-service-account-keys-create-console) +to create and download a JSON credentials file. Store this credentials file in a well known location and make sure to +set yourself exclusive permissions. You can change the file permissions by running the command `chmod 600 ` on your terminal. @@ -109,7 +121,8 @@ export GOOGLE_CREDENTIALS="path/to/JSON/file/with/credentials" export PROJECT_ID="projectIDName" ``` -> NOTE: the [`PROJECT_ID` variable](https://cloud.google.com/resource-manager/docs/creating-managing-projects) can be found at the Google Console homepage, under `Project info`. +> NOTE: the [`PROJECT_ID` variable](https://cloud.google.com/resource-manager/docs/creating-managing-projects) can be +> found at the Google Console homepage, under `Project info`.
@@ -119,7 +132,8 @@ export PROJECT_ID="projectIDName" Follow [these instructions](https://registry.terraform.io/providers/hashicorp/azurerm/latest/docs/guides/service_principal_client_secret#creating-a-service-principal-in-the-azure-portal) -to create a Service Principal in the Azure Portal. After completing the steps described on the link, set the following environment variables such as below: +to create a Service Principal in the Azure Portal. After completing the steps described on the link, set the following +environment variables such as below: ```shell export ARM_CLIENT_ID="" # application (client) ID @@ -129,41 +143,52 @@ export ARM_TENANT_ID="" # field available under `Azure Active Director ``` > NOTE 1: Having trouble finding your Subscription ID? -> [Azure's official docs](https://docs.microsoft.com/en-us/azure/media-services/latest/how-to-set-azure-subscription?tabs=portal) might help. +> [Azure's official docs](https://docs.microsoft.com/en-us/azure/media-services/latest/how-to-set-azure-subscription?tabs=portal) +> might help. -> NOTE 2: [Tenant ID](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/active-directory-how-to-find-tenant) values can be also found using PowerShell and CLI. +> NOTE 2: +> [Tenant ID](https://docs.microsoft.com/en-us/azure/active-directory/fundamentals/active-directory-how-to-find-tenant) +> values can be also found using PowerShell and CLI.
## Authentication -User identity in QHub is now managed within Keycloak which is a robust and highly flexible open source identity and access management solution. A Keycloak instance will be deployed -inside your QHub. It can be configured to work with many OAuth 2.0 identity providers, it can federate users from existing databases (such as LDAP), or it can be used as a simple -database of username/passwords. +User identity in QHub is now managed within Keycloak which is a robust and highly flexible open source identity and +access management solution. A Keycloak instance will be deployed inside your QHub. It can be configured to work with +many OAuth 2.0 identity providers, it can federate users from existing databases (such as LDAP), or it can be used as a +simple database of username/passwords. -The full extent of possible configuration can't be covered here, so we provide three simple options that can be configured automatically by QHub when it sets up your new platform. -These options are basic passwords, GitHub single-sign on, or Auth0 single-sign on (which in turn can be configured to allow identity to be provided by social login etc). +The full extent of possible configuration can't be covered here, so we provide three simple options that can be +configured automatically by QHub when it sets up your new platform. These options are basic passwords, GitHub +single-sign on, or Auth0 single-sign on (which in turn can be configured to allow identity to be provided by social +login etc). -You will actually instruct `qhub init` which method you have chosen when you move on to the [Usage](usage.md) section, but at this stage you may need to set environment variables -corresponding to your choice: +You will actually instruct `qhub init` which method you have chosen when you move on to the [Usage](usage.md) section, +but at this stage you may need to set environment variables corresponding to your choice: ### Auth0
Click for Auth0 configuration details -Auth0 is a great choice to enable flexible authentication via multiple providers. To create the necessary access tokens you will need to have an [Auth0](https://auth0.com/) account -and be logged in. [Directions for creating an Auth0 application](https://auth0.com/docs/applications/set-up-an-application/register-machine-to-machine-applications). +Auth0 is a great choice to enable flexible authentication via multiple providers. To create the necessary access tokens +you will need to have an [Auth0](https://auth0.com/) account and be logged in. +[Directions for creating an Auth0 application](https://auth0.com/docs/applications/set-up-an-application/register-machine-to-machine-applications). - Click on the `Applications` button on the left - Select `Create Application` > `Machine to Machine Applications` > `Auth0 Management API` from the dropdown menu - Next, click `All` next to `Select all` and click `Authorize` -- Set the variable `AUTH0_CLIENT_ID` equal to the `Client ID` string, and do the same for the `Client secret` by running the command below. +- Set the variable `AUTH0_CLIENT_ID` equal to the `Client ID` string, and do the same for the `Client secret` by running + the command below. With the application created set the following environment variables: -- `AUTH0_CLIENT_ID`: client ID of Auth0 machine-to-machine application found at top of the newly created application page -- `AUTH0_CLIENT_SECRET`: secret ID of Auth0 machine-to-machine application found in the `Settings` tab of the newly created application -- `AUTH0_DOMAIN`: The `Tenant Name` which can be found in the general account settings on the left hand side of the page appended with `.auth0.com`, for example: +- `AUTH0_CLIENT_ID`: client ID of Auth0 machine-to-machine application found at top of the newly created application + page +- `AUTH0_CLIENT_SECRET`: secret ID of Auth0 machine-to-machine application found in the `Settings` tab of the newly + created application +- `AUTH0_DOMAIN`: The `Tenant Name` which can be found in the general account settings on the left hand side of the page + appended with `.auth0.com`, for example: ```bash export AUTH_DOMAIN="qhub-test.auth0.com" # in case the Tenant Name was called 'qhub-test' @@ -177,16 +202,19 @@ export AUTH_DOMAIN="qhub-test.auth0.com" # in case the Tenant Name was called 'q To use GitHub as a single-sign on provider, you will need to create a new OAuth 2.0 app. -No environment variables are needed for this - you will be given the relevant information and prompted for various inputs during the next stage, when you run -[`qhub init`](./usage.md) if you provide the flag `--auth-provider github`. This will be covered when you reach that point in this documentation. +No environment variables are needed for this - you will be given the relevant information and prompted for various +inputs during the next stage, when you run [`qhub init`](./usage.md) if you provide the flag `--auth-provider github`. +This will be covered when you reach that point in this documentation.
### Identity Providers -If you would like to use a different method for authentication that is not automatically covered in Qhub, such as Facebook, Microsoft AAD, or a custom provider. You can do so by -defining the expected **identity provider** in the [Keyclok admin panel](https://docs.qhub.dev/en/latest/source/installation/login.html#login). An identity provider derives from a -specific protocol used to authenticate and send authentication and authorization information to users. It can be: +If you would like to use a different method for authentication that is not automatically covered in Qhub, such as +Facebook, Microsoft AAD, or a custom provider. You can do so by defining the expected **identity provider** in the +[Keyclok admin panel](https://docs.qhub.dev/en/latest/source/installation/login.html#login). An identity provider +derives from a specific protocol used to authenticate and send authentication and authorization information to users. It +can be: - A social provider such as Facebook, Google, or Twitter. @@ -202,28 +230,36 @@ Typically, Keycloak bases identity providers on the following protocols: - `OAuth v2.0` -If your authentication provider uses the above mentioned protocols you can follow [this steps](https://www.keycloak.org/docs/latest/server_admin/#default_identity_provider) to -create a new identity provider or use an existing configuration for a [social identity provider](https://www.keycloak.org/docs/latest/server_admin/#social-identity-providers). +If your authentication provider uses the above mentioned protocols you can follow +[this steps](https://www.keycloak.org/docs/latest/server_admin/#default_identity_provider) to create a new identity +provider or use an existing configuration for a +[social identity provider](https://www.keycloak.org/docs/latest/server_admin/#social-identity-providers). ## CI/CD Pipeline -In the [Usage](usage.md) section, you will need to run `qhub init` (this only ever needs to be run once - it creates your configuration YAML file) and then `qhub deploy` to set up -the cloud infrastructure and deploy QHub for the first time. +In the [Usage](usage.md) section, you will need to run `qhub init` (this only ever needs to be run once - it creates +your configuration YAML file) and then `qhub deploy` to set up the cloud infrastructure and deploy QHub for the first +time. -For subsequent deployments, it's possible to run `qhub deploy` again in exactly the same way, providing the configuration YAML file as you would the first time. However, it's also -possible to automate future deployments using 'DevOps' - the configuration YAML file stored in git will trigger automatic redeployment whenever it's edited. +For subsequent deployments, it's possible to run `qhub deploy` again in exactly the same way, providing the +configuration YAML file as you would the first time. However, it's also possible to automate future deployments using +'DevOps' - the configuration YAML file stored in git will trigger automatic redeployment whenever it's edited. -This DevOps approach can be provided by GitHub Actions or GitLab Workflows. As for the other choices, you will only need to specify the CI/CD provider when you come to run -`qhub init`, but you may need to set relevant environment variables unless you choose 'none' because you plan to always redeploy manually. +This DevOps approach can be provided by GitHub Actions or GitLab Workflows. As for the other choices, you will only need +to specify the CI/CD provider when you come to run `qhub init`, but you may need to set relevant environment variables +unless you choose 'none' because you plan to always redeploy manually. ### GitHub
Click for GitHub Actions configuration details -QHub uses GitHub Actions to enable [Infrastructure as Code](https://en.wikipedia.org/wiki/Infrastructure_as_code) and trigger the CI/CD checks on the configuration file that -automatically generates the deployment modules for the infrastructure. To do that, it will be necessary to set the GitHub username and token as environment variables. First create -a github personal access token via [these instructions](https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token). The token needs permissions to -create a repo and create secrets on the repo. At the moment we don't have the permissions well scoped out so to be on the safe side enable all permissions. +QHub uses GitHub Actions to enable [Infrastructure as Code](https://en.wikipedia.org/wiki/Infrastructure_as_code) and +trigger the CI/CD checks on the configuration file that automatically generates the deployment modules for the +infrastructure. To do that, it will be necessary to set the GitHub username and token as environment variables. First +create a github personal access token via +[these instructions](https://docs.github.com/en/github/authenticating-to-github/creating-a-personal-access-token). The +token needs permissions to create a repo and create secrets on the repo. At the moment we don't have the permissions +well scoped out so to be on the safe side enable all permissions. - `GITHUB_USERNAME`: GitHub username - `GITHUB_TOKEN`: GitHub-generated token @@ -234,41 +270,51 @@ create a repo and create secrets on the repo. At the moment we don't have the pe
Click for GitLab Workflow configuration details -If you want to use GitLab CI to automatically deploy changes to your configuration, then no extra environment variables are needed for this. +If you want to use GitLab CI to automatically deploy changes to your configuration, then no extra environment variables +are needed for this. -All git repo and CI setup on GitLab will need to be done manually. At the next stage, when you run [`qhub init`](./usage.md) please provide the flag `--ci-provider gitlab-ci`. +All git repo and CI setup on GitLab will need to be done manually. At the next stage, when you run +[`qhub init`](./usage.md) please provide the flag `--ci-provider gitlab-ci`. -After initial deploy, the documentation should tell you when to commit your configuration files into your GitLab repo. There should be your `qhub-config.yaml` file as well as a -generated file called `.gitlab-ci.yml`. You will need to manually set environment variables for your cloud provider as secrets in your GitLab CI for the repo. +After initial deploy, the documentation should tell you when to commit your configuration files into your GitLab repo. +There should be your `qhub-config.yaml` file as well as a generated file called `.gitlab-ci.yml`. You will need to +manually set environment variables for your cloud provider as secrets in your GitLab CI for the repo.
## Domain registry -Finally, you will need to have a domain name for hosting QHub. This domain will be where your application will be exposed. +Finally, you will need to have a domain name for hosting QHub. This domain will be where your application will be +exposed. -Currently, QHub only supports CloudFlare for automatic DNS registration. If an alternate DNS provider is desired, change the `--dns-provider` flag from `cloudflare` to `none` on -the `qhub deploy` command. The deployment then will be paused when it asks for an IP address (or CNAME, if using AWS) and prompt to register the desired URL. Setting a DNS record -heavily depends on the provider thus it's not possible to have detailed docs on how to create a record on your provider. Googling `setting record on ` -should yield good results on doing it for your specific provider. +Currently, QHub only supports CloudFlare for automatic DNS registration. If an alternate DNS provider is desired, change +the `--dns-provider` flag from `cloudflare` to `none` on the `qhub deploy` command. The deployment then will be paused +when it asks for an IP address (or CNAME, if using AWS) and prompt to register the desired URL. Setting a DNS record +heavily depends on the provider thus it's not possible to have detailed docs on how to create a record on your provider. +Googling `setting record on ` should yield good results on doing it for your specific provider. ### Cloudflare
Click for Cloudflare configuration details -QHub supports Cloudflare as a DNS provider. If you choose to use Cloudflare, first create an account, then there are two possible following options: +QHub supports Cloudflare as a DNS provider. If you choose to use Cloudflare, first create an account, then there are two +possible following options: 1. You can register your application domain name on it, using the - [Cloudflare nameserver](https://support.cloudflare.com/hc/en-us/articles/205195708-Changing-your-domain-nameservers-to-Cloudflare) (recommended). + [Cloudflare nameserver](https://support.cloudflare.com/hc/en-us/articles/205195708-Changing-your-domain-nameservers-to-Cloudflare) + (recommended). 2. You can outright buy a new domain with Cloudflare (this action isn't particularly recommended). To generate a token [follow these steps](https://developers.cloudflare.com/api/tokens/create): - Under `Profile`, select the `API Tokens` menu and click on `Create API Token`. - On `Edit zone DNS` click on `Use Template`. ![screenshot Cloudflare edit Zone DNS](../images/cloudflare_auth_1.png) -- Configure `Permissions` such as the image below: ![screenshot Cloudflare Permissions edit](../images/cloudflare_permissions_2.1.1.png) -- On `Account Resources` set the configuration to include your desired account ![screenshot Cloudflare account resources](../images/cloudflare_account_resources_scr.png) -- On `Zone Resources` set it to `Include | Specific zone` and your domain name ![screenshot Cloudflare account resources](../images/cloudflare_zone_resources.png) +- Configure `Permissions` such as the image below: + ![screenshot Cloudflare Permissions edit](../images/cloudflare_permissions_2.1.1.png) +- On `Account Resources` set the configuration to include your desired account + ![screenshot Cloudflare account resources](../images/cloudflare_account_resources_scr.png) +- On `Zone Resources` set it to `Include | Specific zone` and your domain name + ![screenshot Cloudflare account resources](../images/cloudflare_zone_resources.png) - Click continue to summary ![screenshot Cloudflare summary](../images/cloudflare_summary.png) - Click on the `Create Token` button and set the token generated as an environment variable on your machine. diff --git a/docs/source/installation/usage.md b/docs/source/installation/usage.md index bcbdedc6b2..7f1868aa5e 100644 --- a/docs/source/installation/usage.md +++ b/docs/source/installation/usage.md @@ -2,13 +2,15 @@ ## Cloud Deployment -Great, you've gone through the [Installation](installation.md) and [Setup Initialization](setup.md) steps, and have ensured that all the necessary environment variables have been -properly set. Now it is time to deploy QHub from your terminal. +Great, you've gone through the [Installation](installation.md) and [Setup Initialization](setup.md) steps, and have +ensured that all the necessary environment variables have been properly set. Now it is time to deploy QHub from your +terminal. ### Initialize configuration -There are several ways to generate your configuration file. You can type the commands when prompted by terminal, or you can set it all automatically from the start. In any case, we -advise you to start by creating a new project folder. Here, we will name the new folder `qhub-test`. +There are several ways to generate your configuration file. You can type the commands when prompted by terminal, or you +can set it all automatically from the start. In any case, we advise you to start by creating a new project folder. Here, +we will name the new folder `qhub-test`. On your terminal run: @@ -31,28 +33,34 @@ qhub init aws \ There are several **optional** (yet highly recommended) flags that allow to configure the deployment: -The command above will generate the `qhub-config.yaml` config file with an infrastructure deployed on `aws`, named `projectname`, where the domain will be `qhub.dev`. +The command above will generate the `qhub-config.yaml` config file with an infrastructure deployed on `aws`, named +`projectname`, where the domain will be `qhub.dev`. -The deployment will use `github-actions` as the continuous integration (CI) provider, automatically provisioning a repository on GitHub under the URL -`github.com/quansight/projectname` +The deployment will use `github-actions` as the continuous integration (CI) provider, automatically provisioning a +repository on GitHub under the URL `github.com/quansight/projectname` -User authentication will be by `auth0`, and an OAuth 2.0 app will be created on Auth0 automatically. There are several flags that allow you to configure the deployment: +User authentication will be by `auth0`, and an OAuth 2.0 app will be created on Auth0 automatically. There are several +flags that allow you to configure the deployment: - `aws` indicates that the project will be deployed on the Amazon AWS Cloud provider. - Optional flags are: `gcp`, `do` and `azure`. -- `--project`: the name of the project is required to be a string compliant with the Cloud provider recommendations. For more details see official Cloud provider docs on naming - policies and see below on the [project naming convention](#project-naming-convention). -- `--domain`: base domain for your cluster. This pattern is also applicable if you are setting your own DNS through a different provider. +- `--project`: the name of the project is required to be a string compliant with the Cloud provider recommendations. For + more details see official Cloud provider docs on naming policies and see below on the + [project naming convention](#project-naming-convention). +- `--domain`: base domain for your cluster. This pattern is also applicable if you are setting your own DNS through a + different provider. - `qhub.dev` is the domain registered on CloudFlare. If you chose not to use Cloudflare, skip this flag. - `--ci-provider`: specifies what provider to use for CI/CD. Currently, supports GitHub Actions, GitLab CI, or none. - `--auth-provider`: This will set configuration file to use the specified provider for authentication. - `--auth-auto-provision`: This will automatically create and configure an application using OAuth. - `--repository`: Repository name that will be used to store the Infrastructure-as-Code on GitHub. - `--repository-auto-provision`: Sets the secrets for the GitHub repository used for CI/CD actions. -- `--ssl-cert-email`: Provide an admin's email address so that LetsEncrypt can generate a real SSL certificate for your site. If omitted, the site will use a self-signed cert that - may cause problems for some browsers but may be sufficient for testing. +- `--ssl-cert-email`: Provide an admin's email address so that LetsEncrypt can generate a real SSL certificate for your + site. If omitted, the site will use a self-signed cert that may cause problems for some browsers but may be sufficient + for testing. -You will be prompted to enter values for some of the choices above if they are omitted as command line arguments (for example project name and domain). +You will be prompted to enter values for some of the choices above if they are omitted as command line arguments (for +example project name and domain). The `qhub init` command also generates an initial password for your root Keycloak user: @@ -60,29 +68,35 @@ The `qhub init` command also generates an initial password for your root Keycloa Securely generated default random password=R1E8aWedaQVU6kKv for Keycloak root user stored at path=/tmp/QHUB_DEFAULT_PASSWORD ``` -This password is also available in the `qhub-config.yaml` file under the `security.keycloak.initial_root_password field`. It's required in the next page of these docs for logging -in to your QHub. +This password is also available in the `qhub-config.yaml` file under the +`security.keycloak.initial_root_password field`. It's required in the next page of these docs for logging in to your +QHub. -This `qhub init` command generates the `qhub-config.yaml` config file with an infrastructure to be deployed on `aws`, named `projectname`, with a domain name set to `qhub.dev`. The -deployment uses `github-actions` as the continuous integration provider, automatically provisioned and authenticated by `auth0`. And finally, initialized on GitHub under the URL -`github.com/quansight/projectname`. +This `qhub init` command generates the `qhub-config.yaml` config file with an infrastructure to be deployed on `aws`, +named `projectname`, with a domain name set to `qhub.dev`. The deployment uses `github-actions` as the continuous +integration provider, automatically provisioned and authenticated by `auth0`. And finally, initialized on GitHub under +the URL `github.com/quansight/projectname`. -If employing an infrastructure-as-code approach, this is where you would make the desired infrastructure changes including adding users, changing Dask worker instance type and much -more. Once you're happy with your changes you would redeploy those changes using GitHub Actions. For more details on the `qhub-config.yaml` please see +If employing an infrastructure-as-code approach, this is where you would make the desired infrastructure changes +including adding users, changing Dask worker instance type and much more. Once you're happy with your changes you would +redeploy those changes using GitHub Actions. For more details on the `qhub-config.yaml` please see [Configuration](configuration.md) -The proceeding command will generate the `qhub-config.yaml` config file with an infrastructure deployed on `aws`, named `projectname`, where the domain will be `qhub.dev`. The -deployment will use `github-actions` as the continuous integration (CI) provider, automatically provisioned and authenticated by `auth0`, initialized on GitHub under the URL +The proceeding command will generate the `qhub-config.yaml` config file with an infrastructure deployed on `aws`, named +`projectname`, where the domain will be `qhub.dev`. The deployment will use `github-actions` as the continuous +integration (CI) provider, automatically provisioned and authenticated by `auth0`, initialized on GitHub under the URL `github.com/quansight/projectname`. -If employing an infrastructure-as-code approach, this is where you would make the desired infrastructure changes including adding environments, changing Dask worker instance type -and much more. Once you're happy with your changes you would redeploy those changes using GitHub Actions. For more details on the `qhub-config.yaml` please see +If employing an infrastructure-as-code approach, this is where you would make the desired infrastructure changes +including adding environments, changing Dask worker instance type and much more. Once you're happy with your changes you +would redeploy those changes using GitHub Actions. For more details on the `qhub-config.yaml` please see [Configuration](configuration.md) ##### Project Naming Convention -In order to successfully deploy QHub, there are some project naming conventions which need to be followed. For starters, make sure your project name is compatible with the -specifics of your chosen Cloud provider. In addition, QHub `projectname` should also obey to the following format requirements: +In order to successfully deploy QHub, there are some project naming conventions which need to be followed. For starters, +make sure your project name is compatible with the specifics of your chosen Cloud provider. In addition, QHub +`projectname` should also obey to the following format requirements: > - letters from A to Z (upper and lower case) and numbers; > - Special characters are **NOT** allowed; @@ -91,11 +105,13 @@ specifics of your chosen Cloud provider. In addition, QHub `projectname` should ### Understanding the qhub-config.yaml file -The `qhub init` command may have some side-effects such automatically creating a GitHub repository and setting some repo secrets (if you used the `--repository-auto-provision` -flag), and creating an Auth0 app, but the main output of the command is the `qhub-config.yaml` file. +The `qhub init` command may have some side-effects such automatically creating a GitHub repository and setting some repo +secrets (if you used the `--repository-auto-provision` flag), and creating an Auth0 app, but the main output of the +command is the `qhub-config.yaml` file. -This file is the configuration file that will determine how the cloud infrastructure and QHub is built and deployed in the next step. But at this point it's just a text file. You -could edit it manually if you are unhappy with the choices, or delete it and start over again. It is also possible to create this config file from scratch or re-use an existing +This file is the configuration file that will determine how the cloud infrastructure and QHub is built and deployed in +the next step. But at this point it's just a text file. You could edit it manually if you are unhappy with the choices, +or delete it and start over again. It is also possible to create this config file from scratch or re-use an existing one. Ulimately it's not essential to use `qhub init` at all, but it's often the easiest way to get started. To understand some ways in which you could decide to edit the YAML file, see [Advanced Configuration](configuration.md). @@ -108,7 +124,8 @@ Finally, with the `qhub-config.yaml` created, QHub can be deployed for the first qhub deploy -c qhub-config.yaml --dns-provider cloudflare --dns-auto-provision ``` -> Omit `--dns-provider cloudflare --dns-auto-provision` if you are not using Cloudflare and will set up your DNS manually. +> Omit `--dns-provider cloudflare --dns-auto-provision` if you are not using Cloudflare and will set up your DNS +> manually. This creates the following folder structure: @@ -124,10 +141,11 @@ This creates the following folder structure: └── terraform-state # required by terraform to securely store the state of the deployment ``` -The terminal will prompt you to press `[enter]` to check auth credentials (which were added by the `qhub init` command). That will trigger the deployment which will take around 10 -minutes to complete. +The terminal will prompt you to press `[enter]` to check auth credentials (which were added by the `qhub init` command). +That will trigger the deployment which will take around 10 minutes to complete. -During the initial deployment, Digital Ocean, GCP and Azure are going to display an `"ip"` address whereas AWS is going to display a CNAME `"hostname"`. +During the initial deployment, Digital Ocean, GCP and Azure are going to display an `"ip"` address whereas AWS is going +to display a CNAME `"hostname"`. - Digital Ocean/Google Cloud Platform @@ -147,14 +165,16 @@ During the initial deployment, Digital Ocean, GCP and Azure are going to display } ``` -If you specified `--dns-provider cloudflare --dns-auto-provision` on the command line, your DNS records for your domain should be updated automatically on Cloudflare. If you -omitted those flags, you will be prompted to set the A/CNAME records manually on your domain name registrar's nameservers. +If you specified `--dns-provider cloudflare --dns-auto-provision` on the command line, your DNS records for your domain +should be updated automatically on Cloudflare. If you omitted those flags, you will be prompted to set the A/CNAME +records manually on your domain name registrar's nameservers. ### GitOps -If you chose `--ci-provider github-actions` (or `gitlab-ci`) then QHub will use a GitHub Actions workflow (or GitLab equivalent) to automatically handle future deployment of the -infrastructure. For that to work, your newly generated project must be pushed to GitHub. Using the URL under the `--repository` flag on the `qhub init` command, you need to commit -all files to the git repo. +If you chose `--ci-provider github-actions` (or `gitlab-ci`) then QHub will use a GitHub Actions workflow (or GitLab +equivalent) to automatically handle future deployment of the infrastructure. For that to work, your newly generated +project must be pushed to GitHub. Using the URL under the `--repository` flag on the `qhub init` command, you need to +commit all files to the git repo. To add the project to the initialized remote git repository run: @@ -169,8 +189,9 @@ Push the changes to the repository (your primary branch may be called `master` i git push origin main ``` -Once the files are in GitHub, all CI/CD changes will be triggered by commits to main, and deployed via GitHub Actions. Since the infrastructure state is reflected in the -repository, this workflow allows for team members to submit pull requests that can be reviewed before modifying the infrastructure, easing the maintenance process. +Once the files are in GitHub, all CI/CD changes will be triggered by commits to main, and deployed via GitHub Actions. +Since the infrastructure state is reflected in the repository, this workflow allows for team members to submit pull +requests that can be reviewed before modifying the infrastructure, easing the maintenance process. To automatically deploy (and to keep track of changes more effectively): @@ -178,6 +199,7 @@ To automatically deploy (and to keep track of changes more effectively): - create a pull request (PR) to main. - Trigger the deployment by merging the PR. All changes will be automatically applied to the new QHub instance. -Having issues? Head over to our [Troubleshooting](../admin_guide/troubleshooting.md) section for tips on how to debug your QHub. Or try our [FAQ](../admin_guide/faq.md). +Having issues? Head over to our [Troubleshooting](../admin_guide/troubleshooting.md) section for tips on how to debug +your QHub. Or try our [FAQ](../admin_guide/faq.md). If your deployment seemed to be successful, next learn how to [login](login.md). diff --git a/docs/source/introduction/index.md b/docs/source/introduction/index.md index 2a697dd43a..e4cf509d03 100644 --- a/docs/source/introduction/index.md +++ b/docs/source/introduction/index.md @@ -2,11 +2,11 @@ QHub is an open source tool for data science research, development, and deployment. -QHub is [**Infrastructure as Code**](https://en.wikipedia.org/wiki/Infrastructure_as_code) that simplifies the deployment of data science projects using JupyterHub and Dask Gateway -for you and your team. +QHub is [**Infrastructure as Code**](https://en.wikipedia.org/wiki/Infrastructure_as_code) that simplifies the +deployment of data science projects using JupyterHub and Dask Gateway for you and your team. -Designed to simplify the deployment and maintenance of scalable computational platforms in the cloud, QHub is ideal for organizations that need a shared compute platform that's -flexible, accessible, and scalable. +Designed to simplify the deployment and maintenance of scalable computational platforms in the cloud, QHub is ideal for +organizations that need a shared compute platform that's flexible, accessible, and scalable. ## QHub Technology Stack @@ -16,25 +16,33 @@ flexible, accessible, and scalable. The technology stack is an integration of the following existing open source libraries: -- [**Terraform**](https://www.terraform.io/intro/index.html) a tool for building, changing, and versioning infrastructure. +- [**Terraform**](https://www.terraform.io/intro/index.html) a tool for building, changing, and versioning + infrastructure. - [**Kubernetes**](https://kubernetes.io/docs/home/) a cloud-agnostic orchestration system - [**Helm**](https://helm.sh/): a package manager for Kubernetes - [**JupyterHub**](https://jupyter.org/hub): a shareable compute platform for data science -- [**JupyterLab**](https://jupyterlab.readthedocs.io/en/stable/): a web-based interactive development environment for Jupyter Notebooks +- [**JupyterLab**](https://jupyterlab.readthedocs.io/en/stable/): a web-based interactive development environment for + Jupyter Notebooks - [**Dask**](https://docs.dask.org/en/latest/): a scalable and flexible library for parallel computing in Python - [**Dask-Gateway**](https://gateway.dask.org/): a secure, multi-tenant server for managing Dask clusters - [**Keycloak**](https://www.keycloak.org/) Open Source Identity and Access Management -- [**GitHub Actions**](https://docs.github.com/en/actions): a tool to automate, customize, and execute software development workflows in a GitHub repository. +- [**GitHub Actions**](https://docs.github.com/en/actions): a tool to automate, customize, and execute software + development workflows in a GitHub repository. - [**traefik**](https://traefik.io/) for routing web/tcp traffic inside cluster - - [**traefik-forward-auth**](/~https://github.com/thomseddon/traefik-forward-auth) single sign on and easy securing of web applications + - [**traefik-forward-auth**](/~https://github.com/thomseddon/traefik-forward-auth) single sign on and easy securing of + web applications Amongst the newly created open source libraries on the tech stack are: -- [**jupyterhub-ssh**](/~https://github.com/yuvipanda/jupyterhub-ssh) brings the SSH experience to a modern cluster manager. -- [**jupyter-videochat**](/~https://github.com/yuvipanda/jupyter-videochat) allows video-chat with JupyterHub peers inside JupyterLab, powered by Jitsi. -- [**conda-store**](/~https://github.com/quansight/conda-store) serves identical conda environments and controls its life-cycle. -- [**conda-docker**](/~https://github.com/conda-incubator/conda-docker), an extension to the docker concept of having declarative environments that are associated with Docker images - allowing tricks and behaviour that otherwise would not be allowed. +- [**jupyterhub-ssh**](/~https://github.com/yuvipanda/jupyterhub-ssh) brings the SSH experience to a modern cluster + manager. +- [**jupyter-videochat**](/~https://github.com/yuvipanda/jupyter-videochat) allows video-chat with JupyterHub peers inside + JupyterLab, powered by Jitsi. +- [**conda-store**](/~https://github.com/quansight/conda-store) serves identical conda environments and controls its + life-cycle. +- [**conda-docker**](/~https://github.com/conda-incubator/conda-docker), an extension to the docker concept of having + declarative environments that are associated with Docker images allowing tricks and behaviour that otherwise would not + be allowed. - [**vscode**](/~https://github.com/cdr/code-server) built-in web editor tied to jupyterlab server ### Integrations diff --git a/docs/source/introduction/qhub-101.md b/docs/source/introduction/qhub-101.md index 94677d9e97..1528cda8f5 100644 --- a/docs/source/introduction/qhub-101.md +++ b/docs/source/introduction/qhub-101.md @@ -1,10 +1,12 @@ # QHub 101 -QHub is an open source framework that allows data science teams to initialize and maintain their data science stack on the cloud. QHub makes use of Terraform to deploy JupyterHub, -JupyterLab, Dask, and Conda environments on Kubernetes clusters across all major cloud providers. +QHub is an open source framework that allows data science teams to initialize and maintain their data science stack on +the cloud. QHub makes use of Terraform to deploy JupyterHub, JupyterLab, Dask, and Conda environments on Kubernetes +clusters across all major cloud providers. -Through QHub, the deployment is managed using a single configuration file and is powered by GitHub Actions. This allows teams to build and maintain cost-effective and scalable -infrastructure for compute/data science on cloud or on-premises. QHub is designed to be used and deployed by anyone, requiring minimal DevOps experience. +Through QHub, the deployment is managed using a single configuration file and is powered by GitHub Actions. This allows +teams to build and maintain cost-effective and scalable infrastructure for compute/data science on cloud or on-premises. +QHub is designed to be used and deployed by anyone, requiring minimal DevOps experience. ## Features @@ -27,13 +29,16 @@ The QHub architecture facilitates: - Seamless deployment with GitHub Actions using a single configuration file. - Allow multiple teams to collaborate together with control permissions. -At a high-level, QHub makes use of Network File System (NFS) to provide storage access to Kubernetes applications. This creates a Kubernetes Persistent Volume that allows NFS to -share files directly. With the aid of Dask integration and environment management with conda-store, QHub provides users with a simple deployment process. +At a high-level, QHub makes use of Network File System (NFS) to provide storage access to Kubernetes applications. This +creates a Kubernetes Persistent Volume that allows NFS to share files directly. With the aid of Dask integration and +environment management with conda-store, QHub provides users with a simple deployment process. -For infrastructure provisioning, QHub uses Terraform to deploy Kubernetes clusters on AWS, GCP, Azure, and Digital Ocean. For Kubernetes deployments, QHub uses Helm Charts to allow -ease of distribution and to deploy QHub on any Kubernetes cluster. +For infrastructure provisioning, QHub uses Terraform to deploy Kubernetes clusters on AWS, GCP, Azure, and Digital +Ocean. For Kubernetes deployments, QHub uses Helm Charts to allow ease of distribution and to deploy QHub on any +Kubernetes cluster. -To learn more about the nitty gritty of QHub's internal architecture, refer to the [QHub Architecture](../dev_guide/architecture.md) section. +To learn more about the nitty gritty of QHub's internal architecture, refer to the +[QHub Architecture](../dev_guide/architecture.md) section. ## Installation and management @@ -48,43 +53,51 @@ QHub can be easily installed using either `conda` or `pip`. pip install qhub ``` -QHub CLI will be installed automatically as part of the install process. After installation, QHub CLI can be used to deploy and manage your environment on HPC, cloud, on-premises -or even locally. To install QHub locally, follow the [testing](../dev_guide/testing.md) section. +QHub CLI will be installed automatically as part of the install process. After installation, QHub CLI can be used to +deploy and manage your environment on HPC, cloud, on-premises or even locally. To install QHub locally, follow the +[testing](../dev_guide/testing.md) section. -For HPC deployment, follow the [QHub HPC](https://hpc.qhub.dev/en/latest/) documentation. For individual cloud deployment, follow the -[installation instructions](../installation/setup.md). After setting up all the credentials, you can deploy QHub using: +For HPC deployment, follow the [QHub HPC](https://hpc.qhub.dev/en/latest/) documentation. For individual cloud +deployment, follow the [installation instructions](../installation/setup.md). After setting up all the credentials, you +can deploy QHub using: ```sh qhub init qhub deploy ``` -After installing QHub, you can further manage your deployment by adding new users, upgrading dependencies, managing your environment, and monitoring your deployment. Refer to the -[management instructions](../installation/management.md) for more details. +After installing QHub, you can further manage your deployment by adding new users, upgrading dependencies, managing your +environment, and monitoring your deployment. Refer to the [management instructions](../installation/management.md) for +more details. ## Using QHub -After setting up QHub, you can visit the URL where QHub is running. Based on your authentication mechanism, you will be greeted by a login page after which the user will be -prompted to use a set of profile available to them. With fixed resources allocated to each user, you can start a cluster by clicking `Start` which will initiate the launch. +After setting up QHub, you can visit the URL where QHub is running. Based on your authentication mechanism, you will be +greeted by a login page after which the user will be prompted to use a set of profile available to them. With fixed +resources allocated to each user, you can start a cluster by clicking `Start` which will initiate the launch. -After the launch, you will be greeted by specific Python environments, which when clicked will start a JupyterLab notebook. To use VS Code, you can use Code Server by clicking -`VS Code IDE` icon. To remotely access the clusters, use the [jupyterhub-ssh](/~https://github.com/yuvipanda/jupyterhub-ssh) extension. For further usage instructions, follow the +After the launch, you will be greeted by specific Python environments, which when clicked will start a JupyterLab +notebook. To use VS Code, you can use Code Server by clicking `VS Code IDE` icon. To remotely access the clusters, use +the [jupyterhub-ssh](/~https://github.com/yuvipanda/jupyterhub-ssh) extension. For further usage instructions, follow the [using QHub](../user_guide/index.md) section. ## Community & support QHub is supported by the [Quansight](https://quansight.com) community. We maintain a -[Frequently Asked Questions (FAQ) page](/~https://github.com/Quansight/qhub/blob/main/docs/source/user_guide/faq.md) for QHub users. For QHub queries, we ideally rely upon the -following channels: +[Frequently Asked Questions (FAQ) page](/~https://github.com/Quansight/qhub/blob/main/docs/source/user_guide/faq.md) for +QHub users. For QHub queries, we ideally rely upon the following channels: -- [GitHub Discussions](/~https://github.com/Quansight/qhub/discussions): Raise discussions around particular subjects and specific queries around usage, maintenance and - administration. +- [GitHub Discussions](/~https://github.com/Quansight/qhub/discussions): Raise discussions around particular subjects and + specific queries around usage, maintenance and administration. -- [GitHub Issues](/~https://github.com/Quansight/qhub/issues/new/choose): Use Issues to report bugs, request new features, new documentation or potential refactors. +- [GitHub Issues](/~https://github.com/Quansight/qhub/issues/new/choose): Use Issues to report bugs, request new features, + new documentation or potential refactors. ## How can I contribute? -QHub welcomes new contributors. If you are interested in contributing to QHub, please refer to the [contributing guide](../dev_guide/contribution.md) for more details. +QHub welcomes new contributors. If you are interested in contributing to QHub, please refer to the +[contributing guide](../dev_guide/contribution.md) for more details. -We require contributors to strictly follow our [Code of Conduct](/~https://github.com/Quansight/.github/blob/master/CODE_OF_CONDUCT.md) and propose features, bug fixes and -documentation changes on our [issues page](/~https://github.com/Quansight/qhub/issues/new/choose). +We require contributors to strictly follow our +[Code of Conduct](/~https://github.com/Quansight/.github/blob/master/CODE_OF_CONDUCT.md) and propose features, bug fixes +and documentation changes on our [issues page](/~https://github.com/Quansight/qhub/issues/new/choose). diff --git a/docs/source/user_guide/code_server.md b/docs/source/user_guide/code_server.md index 7aaff6d34d..cd9fc4e12e 100644 --- a/docs/source/user_guide/code_server.md +++ b/docs/source/user_guide/code_server.md @@ -1,11 +1,14 @@ # In Browser VSCode -Code Server is a packaging of VS Code in the browser. Within QHub we have packaged Code Server such that every user's JupyterLab has a full-featured code editor. This editor will -have access to all the same files that your regular JupyterLab session has access to. To launch `Code Server` click on the `VS Code IDE` icon from the Launcher screen, see below. +Code Server is a packaging of VS Code in the browser. Within QHub we have packaged Code Server such that every user's +JupyterLab has a full-featured code editor. This editor will have access to all the same files that your regular +JupyterLab session has access to. To launch `Code Server` click on the `VS Code IDE` icon from the Launcher screen, see +below. ![QHub Kernel Selection](../images/qhub_kernel_selection.png) -A new VS Code tab will be opened, and from there you can access all of the same files as in your JupyterLab file browser. The VS Code state will be saved between sessions so feel -free to add extensions, plugins, etc. to enhance your user experience. +A new VS Code tab will be opened, and from there you can access all of the same files as in your JupyterLab file +browser. The VS Code state will be saved between sessions so feel free to add extensions, plugins, etc. to enhance your +user experience. ![VSCode in browser](../images/qhub_vscode.png) diff --git a/docs/source/user_guide/dashboard.md b/docs/source/user_guide/dashboard.md index 18e1a63094..c5517cf75d 100644 --- a/docs/source/user_guide/dashboard.md +++ b/docs/source/user_guide/dashboard.md @@ -1,12 +1,15 @@ # Dashboards -QHub encourages users to create dashboards that can be shared with other users and groups via [ContainDS Dashboards](https://cdsdashboards.readthedocs.io/en/stable/). Currently, -this dashboarding solution supports Panel, Bokeh, Voila, Streamlit, and Plotly. The solution is general purpose enough to support any web app. For a more detailed guide on using -CDSDashboards, see the [documentation](https://cdsdashboards.readthedocs.io/en/stable/index.html). +QHub encourages users to create dashboards that can be shared with other users and groups via +[ContainDS Dashboards](https://cdsdashboards.readthedocs.io/en/stable/). Currently, this dashboarding solution supports +Panel, Bokeh, Voila, Streamlit, and Plotly. The solution is general purpose enough to support any web app. For a more +detailed guide on using CDSDashboards, see the +[documentation](https://cdsdashboards.readthedocs.io/en/stable/index.html). ![qhub dashboard notebook](../images/qhub_dashboard_notebook.png) -Create a notebook in your jupyterlab environment with the following code in a notebook named `mydashboard.ipynb` in the home directory. +Create a notebook in your jupyterlab environment with the following code in a notebook named `mydashboard.ipynb` in the +home directory. ```python import panel @@ -26,25 +29,30 @@ dashboard = panel.Row(content, png, widget) dashboard.servable() ``` -Once you execute the notebook you should see the output shown above. We will now show how to create a dashboard from this notebook. Keep in mind that for other dashboard-based -solutions, for example Voila, the instructions will be slightly different, in which case we recommend visiting the -[cds docs](https://cdsdashboards.readthedocs.io/en/stable/index.html). Visit your hub homepage which is at `https:///hub/dashboards` or click the dashboard tab in the hub -home menu. +Once you execute the notebook you should see the output shown above. We will now show how to create a dashboard from +this notebook. Keep in mind that for other dashboard-based solutions, for example Voila, the instructions will be +slightly different, in which case we recommend visiting the +[cds docs](https://cdsdashboards.readthedocs.io/en/stable/index.html). Visit your hub homepage which is at +`https:///hub/dashboards` or click the dashboard tab in the hub home menu. ![qhub dashboard new](../images/qhub_new_dashboard.png) -Click `New Dashboard` and give the dashboard any name and description. For now, allow `all users` and use the `jupyter tree`. Note that `bokeh` was selected for the framework since -panel uses bokeh under the covers. Choose the conda environment `dashboard`. Finally, supply a relative path to the dashboard you would like to launch. In this case +Click `New Dashboard` and give the dashboard any name and description. For now, allow `all users` and use the +`jupyter tree`. Note that `bokeh` was selected for the framework since panel uses bokeh under the covers. Choose the +conda environment `dashboard`. Finally, supply a relative path to the dashboard you would like to launch. In this case `./mydashboard.ipynb` since this is the name of the dashboard created above in the notebook. ![qhub new dashboard filled in](../images/qhub_new_dashboard_filled_in.png) -Once you have saved the dashboard, you will be taken to the screen to select the resources the dashboard will have available to it. +Once you have saved the dashboard, you will be taken to the screen to select the resources the dashboard will have +available to it. ![qhub dashboard resources](../images/qhub_dashboard_resources.png) -Once the resources have been chosen, click save and the dashboard will launch. This should provide a dedicated url that you can share with other QHub users to view the dashboard. -In the case of this dashboard the url was `https://training.qhub.dev/user/costrouchov@quansight.com/dash-my-awesome-dashboard/`, but your url will be different. The url will force -authentication for the user to view the dashboard. The dashboard for the notebook in this example is shown below. +Once the resources have been chosen, click save and the dashboard will launch. This should provide a dedicated url that +you can share with other QHub users to view the dashboard. In the case of this dashboard the url was +`https://training.qhub.dev/user/costrouchov@quansight.com/dash-my-awesome-dashboard/`, but your url will be different. +The url will force authentication for the user to view the dashboard. The dashboard for the notebook in this example is +shown below. ![qhub dashboard simple](../images/qhub_dashboard_simple.png) diff --git a/docs/source/user_guide/dask_gateway.md b/docs/source/user_guide/dask_gateway.md index 6737b405f8..dd7b7eca02 100644 --- a/docs/source/user_guide/dask_gateway.md +++ b/docs/source/user_guide/dask_gateway.md @@ -1,9 +1,11 @@ # Using Dask Gateway -[Dask Gateway](https://gateway.dask.org/) provides a secure way to managing dask clusters. QHub uses dask-gateway to expose auto-scaling compute clusters automatically configured -for the user. For a full guide on dask-gateway please [see the docs](https://gateway.dask.org/usage.html). However here we try to detail the important usage on QHub. +[Dask Gateway](https://gateway.dask.org/) provides a secure way to managing dask clusters. QHub uses dask-gateway to +expose auto-scaling compute clusters automatically configured for the user. For a full guide on dask-gateway please +[see the docs](https://gateway.dask.org/usage.html). However here we try to detail the important usage on QHub. -QHub already has the connection information pre-configured for the user. If you would like to see the pre-configured settings, run +QHub already has the connection information pre-configured for the user. If you would like to see the pre-configured +settings, run ```shell cat /etc/dask/gateway.yaml @@ -20,10 +22,11 @@ from dask_gateway import Gateway gateway = Gateway() ``` -QHub has [a section](https://docs.qhub.dev/en/stable/source/installation/configuration.html#profiles) for configuring the dask profiles that users have access to. These can be -accessed via Dask Gateway options. Once the [ipywidget](https://ipywidgets.readthedocs.io/en/latest/) shows up the user can select the options they care about. If you are -interacting in a terminal there are also ways to configure the options. Please see the dask-gateway docs. It's important that the environment used for your notebook matches the -dask worker environment. +QHub has [a section](https://docs.qhub.dev/en/stable/source/installation/configuration.html#profiles) for configuring +the dask profiles that users have access to. These can be accessed via Dask Gateway options. Once the +[ipywidget](https://ipywidgets.readthedocs.io/en/latest/) shows up the user can select the options they care about. If +you are interacting in a terminal there are also ways to configure the options. Please see the dask-gateway docs. It's +important that the environment used for your notebook matches the dask worker environment. ![qhub dask options](../images/qhub_dask_cluster_options.png) @@ -41,16 +44,17 @@ cluster = gateway.new_cluster(options) cluster ``` -The user is presented with a GUI to scale up the number of workers. At first, users start with `0` workers. In addition you can scale up via Python functions. Additionally the GUI -has a `dashboard` link that you can click to view [cluster diagnostics](https://docs.dask.org/en/latest/diagnostics-distributed.html). This link is especially useful for debugging -and benchmarking. +The user is presented with a GUI to scale up the number of workers. At first, users start with `0` workers. In addition +you can scale up via Python functions. Additionally the GUI has a `dashboard` link that you can click to view +[cluster diagnostics](https://docs.dask.org/en/latest/diagnostics-distributed.html). This link is especially useful for +debugging and benchmarking. ```python cluster.scale(1) ``` -Once you have created a cluster and scaled to an appropriate number of workers we can grab our dask client to start the computation. You may also use the cluster menu with the -dashboard link to scale the number of workers. +Once you have created a cluster and scaled to an appropriate number of workers we can grab our dask client to start the +computation. You may also use the cluster menu with the dashboard link to scale the number of workers. ```python client = cluster.get_client() @@ -70,7 +74,8 @@ If a result was returned, your cluster is working. ## Cluster Options -Dask Gateway allows users to configure their clusters via cluster options. Here are some configuration options exposed in QHub's Dask Gateway deployment. +Dask Gateway allows users to configure their clusters via cluster options. Here are some configuration options exposed +in QHub's Dask Gateway deployment. - Get cluster options @@ -95,42 +100,47 @@ options.environment_vars = { options.conda_environment = "tensorflow" ``` -Note: The above configuration options are valid for QHub's Dask Gateway deployment, these might be different for a non QHub deployment, like say Pangeo's Dask Gateway deployment. +Note: The above configuration options are valid for QHub's Dask Gateway deployment, these might be different for a non +QHub deployment, like say Pangeo's Dask Gateway deployment. ## Accessing Cluster Outside of QHub -A long requested feature was the ability to access a dask cluster from outside of the cluster itself. In general this is possible but at the moment can break due to version -mismatches between [dask](https://dask.org/), [distributed](https://distributed.dask.org/en/latest/), and [dask-gateway](https://gateway.dask.org/). Also we have had issues with -other libraries not matching so don't consider this check exhaustive. At a minimum, check that your local environment matches. It's possible that it will work if the versions don't -match exactly, but it's not recommended. +A long requested feature was the ability to access a dask cluster from outside of the cluster itself. In general this is +possible but at the moment can break due to version mismatches between [dask](https://dask.org/), +[distributed](https://distributed.dask.org/en/latest/), and [dask-gateway](https://gateway.dask.org/). Also we have had +issues with other libraries not matching so don't consider this check exhaustive. At a minimum, check that your local +environment matches. It's possible that it will work if the versions don't match exactly, but it's not recommended. ```python import dask, distributed, dask_gateway print(dask.__version__, distributed.__version__, dask_gateway.__version__) ``` -Next you need to supply a JupyterHub API token to validate with the Dask Gateway API. This was not required within QHub since this is automatically set in JupyterLab sessions. -There are several ways to get a JupyterHub API token. +Next you need to supply a JupyterHub API token to validate with the Dask Gateway API. This was not required within QHub +since this is automatically set in JupyterLab sessions. There are several ways to get a JupyterHub API token. -The easiest way is to visit `https:///hub/token` when you are logged in and click `Request new API token`. This should show a long string to copy as your API token. +The easiest way is to visit `https:///hub/token` when you are logged in and click `Request new API token`. +This should show a long string to copy as your API token. ```python import os os.environ['JUPYTERHUB_API_TOKEN'] = '9da45d9...................37779f' ``` -Finally you will need to manually configure the `Gateway` connection parameters. The connection parameters can be easily filled in based on the `` for your deployment. +Finally you will need to manually configure the `Gateway` connection parameters. The connection parameters can be easily +filled in based on the `` for your deployment. ```python gateway = Gateway(address='https:///gateway', auth='jupyterhub', proxy_address='tcp://:8786') ``` -Now your gateway is properly configured. You can follow the usage tutorial above. If your dask, distributed, and dask-gateway versions don't match, connecting to these APIs may -(most likely will) break in unexpected ways. +Now your gateway is properly configured. You can follow the usage tutorial above. If your dask, distributed, and +dask-gateway versions don't match, connecting to these APIs may (most likely will) break in unexpected ways. ## Common Errors -As mentioned above, version mismatches between dask, dask-gateway, and distributed are extremely common. Here are some common errors and the most likely fixes for them: +As mentioned above, version mismatches between dask, dask-gateway, and distributed are extremely common. Here are some +common errors and the most likely fixes for them: ```python ... @@ -141,5 +151,6 @@ ValueError: 404: Not Found This error is due to a version mismatch between the dask-gateway client and dask-gateway server. -If you get `struct unpack` related errors when using dask this is most likely a mismatch in versions for [Dask](https://pypi.org/project/dask/) or -[distributed](https://pypi.org/project/distributed/). The last issue Quansight has run into was due to the version of bokeh being used for the dask dashboard. +If you get `struct unpack` related errors when using dask this is most likely a mismatch in versions for +[Dask](https://pypi.org/project/dask/) or [distributed](https://pypi.org/project/distributed/). The last issue Quansight +has run into was due to the version of bokeh being used for the dask dashboard. diff --git a/docs/source/user_guide/environments.md b/docs/source/user_guide/environments.md index fa07b1425e..28b239d34d 100644 --- a/docs/source/user_guide/environments.md +++ b/docs/source/user_guide/environments.md @@ -1,7 +1,8 @@ # Managing Conda Environments -QHub has several ways to manage environments for users. The traditional approach, available in older QHub deployments, is still available by editing the `qhub-config.yaml` -`environments:` key within the configuration file. Here's an example: +QHub has several ways to manage environments for users. The traditional approach, available in older QHub deployments, +is still available by editing the `qhub-config.yaml` `environments:` key within the configuration file. Here's an +example: ```yaml environments: @@ -20,19 +21,23 @@ environments: - pandas ``` -When the environments are updated in this file and an automated `qhub deploy` is kicked off, the environments are updated for all users. There is also a way to easily create ad-hoc -environments without modifying the file. Visiting `https:///conda-store/` will take you to [Conda-Store](https://conda-store.readthedocs.io/en/latest/) an open source -tool for managing conda environments within enterprise environments. For now the username is anything with a password of `password`, but soon this will be integrated with central -authentication via keycloak. The [create environment endpoint](https://conda-store.readthedocs.io/en/latest/user_guide.html#create-create-environment) will allow you to easily -create a new environment. Additionally, you can update existing environments by -[visiting the environment](https://conda-store.readthedocs.io/en/latest/user_guide.html#environment-namespace-name-environments) and clicking edit. +When the environments are updated in this file and an automated `qhub deploy` is kicked off, the environments are +updated for all users. There is also a way to easily create ad-hoc environments without modifying the file. Visiting +`https:///conda-store/` will take you to [Conda-Store](https://conda-store.readthedocs.io/en/latest/) an +open source tool for managing conda environments within enterprise environments. For now the username is anything with a +password of `password`, but soon this will be integrated with central authentication via keycloak. The +[create environment endpoint](https://conda-store.readthedocs.io/en/latest/user_guide.html#create-create-environment) +will allow you to easily create a new environment. Additionally, you can update existing environments by +[visiting the environment](https://conda-store.readthedocs.io/en/latest/user_guide.html#environment-namespace-name-environments) +and clicking edit. -> NOTE: Environments, even global ones, created from the `/conda-store` user interface CANNOT be used when running dashboards via the CDSDashboard interface. Only those added via -> the `qhub-config.yaml`. +> NOTE: Environments, even global ones, created from the `/conda-store` user interface CANNOT be used when running +> dashboards via the CDSDashboard interface. Only those added via the `qhub-config.yaml`. -In order for your new environment to be properly visible in the list of available kernels, you will need to include `ipykernel` and `ipywidgets` in your environment's dependency -list. Also, if using Dask, you will need to include [extra dependencies](./faq.md/#whats-included-in-the-user-environment-if-a-user-wants-to-use-dask) to maintain version +In order for your new environment to be properly visible in the list of available kernels, you will need to include +`ipykernel` and `ipywidgets` in your environment's dependency list. Also, if using Dask, you will need to include +[extra dependencies](./faq.md/#whats-included-in-the-user-environment-if-a-user-wants-to-use-dask) to maintain version compatibility between the Dask client and server. -We are working towards developing an extension within JupyterLab for editing these environments, but it is not complete at the moment. Follow -[gator](/~https://github.com/mamba-org/gator) for progress on this extension. +We are working towards developing an extension within JupyterLab for editing these environments, but it is not complete +at the moment. Follow [gator](/~https://github.com/mamba-org/gator) for progress on this extension. diff --git a/docs/source/user_guide/faq.md b/docs/source/user_guide/faq.md index ca1460e311..0774c30468 100644 --- a/docs/source/user_guide/faq.md +++ b/docs/source/user_guide/faq.md @@ -1,36 +1,40 @@ # Frequently asked questions -Additional FAQ questions are available in the [GitHub discussions](/~https://github.com/Quansight/qhub/discussions/categories/q-a). +Additional FAQ questions are available in the +[GitHub discussions](/~https://github.com/Quansight/qhub/discussions/categories/q-a). ## Environments ### How are QHub conda user environments created? Who creates them? -The environment specifications are available in `qhub_config.yml` in the deployment repo, which serves to the QHub deployment using -[conda-store](https://conda-store.readthedocs.io/). When the user manages their environments in this way, they get all of the benefits of environment versioning that QHub does -under the hood, including future features, such as convenient environment rollback and environment encapsulation in containers. +The environment specifications are available in `qhub_config.yml` in the deployment repo, which serves to the QHub +deployment using [conda-store](https://conda-store.readthedocs.io/). When the user manages their environments in this +way, they get all of the benefits of environment versioning that QHub does under the hood, including future features, +such as convenient environment rollback and environment encapsulation in containers. -Anyone with access to the QHub deployment repo can add an environment, and there are no limits to the number of included environments. +Anyone with access to the QHub deployment repo can add an environment, and there are no limits to the number of included +environments. > Be careful of the YAML indentation as it differs from the conda `environment.yml` ### What to do when the user requires `X` package and it's not available in the environment? -The proper solution is to add the package to the `qhub_config.yml` (See #1). If they don't have access to the deployment repo, the user needs to contact their QHub maintainer to -get the required package. They *can* do a user install for pip packages if necessary (this is not recommended) but they won't be available to Dask workers. +The proper solution is to add the package to the `qhub_config.yml` (See #1). If they don't have access to the deployment +repo, the user needs to contact their QHub maintainer to get the required package. They *can* do a user install for pip +packages if necessary (this is not recommended) but they won't be available to Dask workers. ### What's included in the user environment if a user wants to use Dask? -The user needs to include the [QHub Dask metapackage](/~https://github.com/conda-forge/qhub-dask-feedstock). Example: `qhub-dask==||QHUB_VERSION||`. This replaces `distributed`, -`dask`, and `dask-gateway` with the correct pinned versions. +The user needs to include the [QHub Dask metapackage](/~https://github.com/conda-forge/qhub-dask-feedstock). Example: +`qhub-dask==||QHUB_VERSION||`. This replaces `distributed`, `dask`, and `dask-gateway` with the correct pinned versions. ### Why can't a user just create their own local conda environment or edit the existing conda environments? -The version of [conda-store](https://conda-store.readthedocs.io/) used in QHub versions 0.3.11 and earlier is an alpha version. It doesn't support using local conda environments or -editing pre-exising environments directly. +The version of [conda-store](https://conda-store.readthedocs.io/) used in QHub versions 0.3.11 and earlier is an alpha +version. It doesn't support using local conda environments or editing pre-exising environments directly. -> See the answer to #2 for information on how to modify environments properly. In the near future, the support for user-defined environments via conda-store is going to be -> implemented. +> See the answer to #2 for information on how to modify environments properly. In the near future, the support for +> user-defined environments via conda-store is going to be implemented. ### How can a user install a local package? Is it available to the user's Dask workers? @@ -50,8 +54,9 @@ These aren't available to the Dask workers. ### How to use .bashrc on QHub? -Users can use `.bashrc` on QHub, but it's important to note that by default QHub sources `.bash_profile`. The users might need to be sure to source the `.bashrc` inside of the -`.bash_profile`. It's important to note that if they set environment variables in this way, they aren't available inside the notebooks. +Users can use `.bashrc` on QHub, but it's important to note that by default QHub sources `.bash_profile`. The users +might need to be sure to source the `.bashrc` inside of the `.bash_profile`. It's important to note that if they set +environment variables in this way, they aren't available inside the notebooks. ### How to use environment variables on dask workers which aren't loaded via a package? @@ -86,22 +91,27 @@ conda config --set changeps1 true ### What if a user wants to use the QHub server to compute a new pinned environment, which the user serves via the `qhub_config.yml`? -If the user needs to solve a conda env on a QHub server, they need to specify the prefix. For example, `conda env create -f env_test.yml --prefix/tmp/test-env` where `test-env` is -the env name. It's not recommended, but there are valid use cases of this operation. +If the user needs to solve a conda env on a QHub server, they need to specify the prefix. For example, +`conda env create -f env_test.yml --prefix/tmp/test-env` where `test-env` is the env name. It's not recommended, but +there are valid use cases of this operation. ## Compute ### I want to upgrade the instance size the `general` node group, is this possible? -The `general` node group / node pool is the node (usually only one) that hosts most of the pods that QHub relies on for its core services, `hub`, `conda-store`, `proxy` and so on. -We have tried to size it so that the initial deployment will work out of the box but also not set it too large that it incurs unnecessary cloud compute costs. +The `general` node group / node pool is the node (usually only one) that hosts most of the pods that QHub relies on for +its core services, `hub`, `conda-store`, `proxy` and so on. We have tried to size it so that the initial deployment will +work out of the box but also not set it too large that it incurs unnecessary cloud compute costs. -Although each cloud provider has different names and hourly prices for their compute nodes, the default `general` node group in `qhub-config.yaml` has 2 vCPU and 8 GB of memory. +Although each cloud provider has different names and hourly prices for their compute nodes, the default `general` node +group in `qhub-config.yaml` has 2 vCPU and 8 GB of memory. -> Given the possible destructive nature of resizing this node group, we **highly recommend** [backing up your cluster](../admin_guide/backup.md) before trying. +> Given the possible destructive nature of resizing this node group, we **highly recommend** +> [backing up your cluster](../admin_guide/backup.md) before trying. -Based on some testing, clusters running on Google Kubernetes Engine (GKE), may have some luck performing in place upgrade. However, this can't be said for the other cloud providers -and attempting to do so for AWS and Azure will likely result in a catastrophic destruction of your cluster. +Based on some testing, clusters running on Google Kubernetes Engine (GKE), may have some luck performing in place +upgrade. However, this can't be said for the other cloud providers and attempting to do so for AWS and Azure will likely +result in a catastrophic destruction of your cluster. | Cloud Provider | `general` node upgrade possible? | | :------------- | :------------------------------- | @@ -110,7 +120,9 @@ and attempting to do so for AWS and Azure will likely result in a catastrophic d | Digital Ocean | No | | GCP | Yes | -If modifying the resource allocation for the `general` node is ultimately necessary, try increasing the max number of nodes for the `general` node group. This will mean two nodes - -reserved for the `general` node group - will likely always be running, increasing the operating cost of the cluster. +If modifying the resource allocation for the `general` node is ultimately necessary, try increasing the max number of +nodes for the `general` node group. This will mean two nodes - reserved for the `general` node group - will likely +always be running, increasing the operating cost of the cluster. -Alternatively, you can backup your cluster, destroy it and redeploy using the same `qhub-config.yaml` but with an instance size of your liking. +Alternatively, you can backup your cluster, destroy it and redeploy using the same `qhub-config.yaml` but with an +instance size of your liking. diff --git a/docs/source/user_guide/getting_started.md b/docs/source/user_guide/getting_started.md index 626815affa..fe9aa15a78 100644 --- a/docs/source/user_guide/getting_started.md +++ b/docs/source/user_guide/getting_started.md @@ -1,44 +1,49 @@ # Login to QHub -This guide aims to give a basic overview of the QHub login process. Your organization's QHub will likely have a slightly different authentication process due to the many -authentication providers that QHub can integrate with. +This guide aims to give a basic overview of the QHub login process. Your organization's QHub will likely have a slightly +different authentication process due to the many authentication providers that QHub can integrate with. -The first step is to connect with your QHub cluster, for this example we will be using `https://qhub-demo.qhub.dev`. Once on the site, you will be prompted by a login, similar to -the login page shown in the image below. +The first step is to connect with your QHub cluster, for this example we will be using `https://qhub-demo.qhub.dev`. +Once on the site, you will be prompted by a login, similar to the login page shown in the image below. ![QHub login screen](../images/qhub_login_screen.png) -Qhub now uses an open source tool called Keycloak for user management. This makes it a little challenging to detail the exact process as it might differ greatly between -authentication providers (LDAP, OAuth 2.0, passwordless authentication, password-based authentication and many others). A deeper overview of the QHub authentication process is -described in the [Authentication Guide](../installation/login.md). +Qhub now uses an open source tool called Keycloak for user management. This makes it a little challenging to detail the +exact process as it might differ greatly between authentication providers (LDAP, OAuth 2.0, passwordless authentication, +password-based authentication and many others). A deeper overview of the QHub authentication process is described in the +[Authentication Guide](../installation/login.md). For this demonstration we will present the user with password-based or GitHub authentication. ![QHub Keycloak auth screen](../images/keycloak_qhub_login.png) -Once authenticated, the user will be forwarded to the main hub page where the user will have access to `Token` management, JupyterLab server access, and other features like -`Dashboards` and `Admin` management. +Once authenticated, the user will be forwarded to the main hub page where the user will have access to `Token` +management, JupyterLab server access, and other features like `Dashboards` and `Admin` management. ![QHub main hub screen](../images/qhub_main_hub_page.png) -After `Start My Server` is selected, the user will be prompted with a set of profiles that are available for the authenticated user. Your given selections will likely differ from -the image shown. The customized profiles will give you access to fixed cloud resources. For example, you could choose a resource with 2 CPUs, 8 GB RAM, and 1 dedicated GPU, all of -which is configured by your administrator. A more detailed explanation of dedicated profiles can be found in the [Profiles](../installation/configuration.md#profiles) section of -the advanced configuration page. +After `Start My Server` is selected, the user will be prompted with a set of profiles that are available for the +authenticated user. Your given selections will likely differ from the image shown. The customized profiles will give you +access to fixed cloud resources. For example, you could choose a resource with 2 CPUs, 8 GB RAM, and 1 dedicated GPU, +all of which is configured by your administrator. A more detailed explanation of dedicated profiles can be found in the +[Profiles](../installation/configuration.md#profiles) section of the advanced configuration page. ![QHub select profile](../images/qhub_select_profile.png) -Once an appropriate profile has been selected, click `start`. At this point, your JupyterHub instance will be launched, a step which may take up to several minutes due to QHub use -of autoscaling under the hood. Ultimately this autoscaling feature helps reduce costs when the cluster is idle. A successful launch should look similar to the image below. +Once an appropriate profile has been selected, click `start`. At this point, your JupyterHub instance will be launched, +a step which may take up to several minutes due to QHub use of autoscaling under the hood. Ultimately this autoscaling +feature helps reduce costs when the cluster is idle. A successful launch should look similar to the image below. ![QHub start server](../images/qhub_server_start.png) -Once your JupyterHub instance has been launched you will notice a selection of available Python environments. These environments will also represent the different kernel choices -available for your notebooks. They are created and managed by conda-store and can be easily configured. Learn more at +Once your JupyterHub instance has been launched you will notice a selection of available Python environments. These +environments will also represent the different kernel choices available for your notebooks. They are created and managed +by conda-store and can be easily configured. Learn more at [Managing environments](../installation/configuration.md#environments). ![QHub kernel selection](../images/qhub_kernel_selection.png) -From the Launcher, you can choose a JupyterLab notebook with a given conda environment. Note that kernels can take several seconds to become responsive. The circle in the top -right-hand corner is a good indicator of the status of the kernel. A lightning bold means that the kernel has started, but it is not yet ready to run code. An open circle means +From the Launcher, you can choose a JupyterLab notebook with a given conda environment. Note that kernels can take +several seconds to become responsive. The circle in the top right-hand corner is a good indicator of the status of the +kernel. A lightning bold means that the kernel has started, but it is not yet ready to run code. An open circle means it's ready. diff --git a/docs/source/user_guide/idle_culler.md b/docs/source/user_guide/idle_culler.md index b423fd7c6e..19d91ffdb2 100644 --- a/docs/source/user_guide/idle_culler.md +++ b/docs/source/user_guide/idle_culler.md @@ -1,30 +1,32 @@ # Culling idle notebook servers -Qhub uses a mix of the `idle culler `\_ extension and internal Jupyterlab server configuration to periodically check for idle -notebook servers and shut them down. +Qhub uses a mix of the `idle culler `\_ extension and internal +Jupyterlab server configuration to periodically check for idle notebook servers and shut them down. -JupyterHub pings the user's notebook server at certain time intervals. If no response is received from the server during this checks and the timeout expires, the server is -considered to be *inactive (idle)* and will be culled. +JupyterHub pings the user's notebook server at certain time intervals. If no response is received from the server during +this checks and the timeout expires, the server is considered to be *inactive (idle)* and will be culled. -To help jupyterhub-idle-culler cull user servers, we configure the kernel manager to cull idle kernels that would otherwise make the user servers report themselves as active which -is part of what jupyterhub-idle-culler considers. +To help jupyterhub-idle-culler cull user servers, we configure the kernel manager to cull idle kernels that would +otherwise make the user servers report themselves as active which is part of what jupyterhub-idle-culler considers. ______________________________________________________________________ -The expected behavior is that the server will be shut down and removed from the Qhub namespace once all Terminals and Kernels are considered idle or terminated, as well as any -remaining connection is closed. +The expected behavior is that the server will be shut down and removed from the Qhub namespace once all Terminals and +Kernels are considered idle or terminated, as well as any remaining connection is closed. ______________________________________________________________________ ## Default settings -By default, JupyterHub will ping the user notebook servers every 5 minutes to check their status. Every server found to be idle for more than 30 minutes will be terminated. +By default, JupyterHub will ping the user notebook servers every 5 minutes to check their status. Every server found to +be idle for more than 30 minutes will be terminated. -Because the servers don't have a maximum age set, an active (has any open connection, terminal or kernel in execution ) server will not be shut down regardless of how long it has -been up and running. +Because the servers don't have a maximum age set, an active (has any open connection, terminal or kernel in execution ) +server will not be shut down regardless of how long it has been up and running. The process for culling and terminating follows these steps: - Check if the Terminal and Notebooks kernels are idle for more than 15 minutes. With periodically culling checks of 5m. - If the kernel is idle for more than 15 minutes, terminate the kernel and the server. -- Once no connections remains, after another 15m of no API calls from the user pod, the server is considered idle, and will be terminated. +- Once no connections remains, after another 15m of no API calls from the user pod, the server is considered idle, and + will be terminated. diff --git a/docs/source/user_guide/ssh.md b/docs/source/user_guide/ssh.md index 1cc7ed437e..92103a7fbb 100644 --- a/docs/source/user_guide/ssh.md +++ b/docs/source/user_guide/ssh.md @@ -1,25 +1,28 @@ # SSH/SFTP Access QHub provides a secure method for users to login while also providing additional ways to connect to the cluster through -[`jupyterhub-ssh`](/~https://github.com/yuvipanda/jupyterhub-ssh). This allows users to access a cluster and a JupyterLab environment via -[`ssh`](https://en.wikipedia.org/wiki/Secure_Shell). In addition, users can easily transfer files back and forth via -[`sftp`](https://en.wikipedia.org/wiki/SSH_File_Transfer_Protocol). And for users who prefer terminal based editors, such as emacs or vim, they can log in and automate tasks on the -cluster without browser access. For more detailed information on using `jupyterhub-ssh`, please refer to the +[`jupyterhub-ssh`](/~https://github.com/yuvipanda/jupyterhub-ssh). This allows users to access a cluster and a JupyterLab +environment via [`ssh`](https://en.wikipedia.org/wiki/Secure_Shell). In addition, users can easily transfer files back +and forth via [`sftp`](https://en.wikipedia.org/wiki/SSH_File_Transfer_Protocol). And for users who prefer terminal +based editors, such as emacs or vim, they can log in and automate tasks on the cluster without browser access. For more +detailed information on using `jupyterhub-ssh`, please refer to the [documentation](https://jupyterhub-ssh.readthedocs.io/en/latest/index.html). -In order to login via `ssh` a user needs to generate an API token. Visit `https:///hub/token`. Where `` is the domain name of your QHub cluster. You will be -shown a screen similar to the one below. You need only generate the API token once; it can be reused going forward. To revoke the API token, simply return to this page and click -`revoke`. +In order to login via `ssh` a user needs to generate an API token. Visit `https:///hub/token`. Where +`` is the domain name of your QHub cluster. You will be shown a screen similar to the one below. You need only +generate the API token once; it can be reused going forward. To revoke the API token, simply return to this page and +click `revoke`. ![qhub api token](../images/qhub_api_token.png) -To request a new token, add a short description, such as `ssh login token`, and click on `Request new API token`. Copy and save the generated api token (in this case -`f0b80688484a4ac79a21b38ec277ca08`). +To request a new token, add a short description, such as `ssh login token`, and click on `Request new API token`. Copy +and save the generated api token (in this case `f0b80688484a4ac79a21b38ec277ca08`). ![qhub api token generated](../images/qhub_api_token_generated.png) -You can now log into the QHub cluster via the terminal using `ssh`. Note that you will use your QHub username, shown in the top right-hand corner of the screen. You will need to -provide this username explicitly when connecting via `ssh`. See the example below on using the `-o` option with `ssh`, and notice the ports used by QHub for `ssh` and `sftp`. +You can now log into the QHub cluster via the terminal using `ssh`. Note that you will use your QHub username, shown in +the top right-hand corner of the screen. You will need to provide this username explicitly when connecting via `ssh`. +See the example below on using the `-o` option with `ssh`, and notice the ports used by QHub for `ssh` and `sftp`. > - `ssh` uses port `8022` > - `sftp` uses port `8023` diff --git a/docs/source/user_guide/training.md b/docs/source/user_guide/training.md index c5f572c2fe..c55dc62a53 100644 --- a/docs/source/user_guide/training.md +++ b/docs/source/user_guide/training.md @@ -1,11 +1,13 @@ # QHub training cluster -Quansight provides training around the [PyData](https://pydata.org/) ecosystem. For this training material, a QHub based training cluster is utilized. This guide explains to -newcomers how to log into Quansight's own QHub Training server. If you are looking for instructions on logging into your own organization's QHub see the user +Quansight provides training around the [PyData](https://pydata.org/) ecosystem. For this training material, a QHub based +training cluster is utilized. This guide explains to newcomers how to log into Quansight's own QHub Training server. If +you are looking for instructions on logging into your own organization's QHub see the user [getting started docs](getting_started.md). -The person in charge of organizing the training on non-Quansight side will provide a list of names and email addresses. In preparation of the upcomng training session, the -Quansight training team will add those users to the aforementioned QHub instance. The instructions that follow assume your user has been added. +The person in charge of organizing the training on non-Quansight side will provide a list of names and email addresses. +In preparation of the upcomng training session, the Quansight training team will add those users to the aforementioned +QHub instance. The instructions that follow assume your user has been added. To access the training platform: @@ -13,7 +15,8 @@ To access the training platform: ![Screenshot of the training platform](../images/training_login_1.png) -2. Enter your email address, the one associated with your invitation and enter the password. At the start of the training session, your trainer will share this password with you. +2. Enter your email address, the one associated with your invitation and enter the password. At the start of the + training session, your trainer will share this password with you. ![Login screen using Keycloak](../images/training_login_2.png) @@ -25,7 +28,8 @@ To access the training platform: ![Screenshot of initial QHub page once user logged in](../images/training_instances.png) -5. Click on the `Training Default` option, or whichever option your instructor has directed you to, and wait to access your new JupyterHub instance, similar to the image below. +5. Click on the `Training Default` option, or whichever option your instructor has directed you to, and wait to access + your new JupyterHub instance, similar to the image below. ![QHub instance started and populated with many environments](../images/training_env.png) diff --git a/docs/source/user_guide/troubleshooting.md b/docs/source/user_guide/troubleshooting.md index b05122a99e..35bbb93ef8 100644 --- a/docs/source/user_guide/troubleshooting.md +++ b/docs/source/user_guide/troubleshooting.md @@ -4,43 +4,49 @@ ### Issue -During your initial QHub deployment, at the end of the `04-kubernetes-ingress` stage, you receive an output message stating that the DNS record for `your_qhub_domain` "appears not -to exist, has recently been updated, or has yet to fully propagate." +During your initial QHub deployment, at the end of the `04-kubernetes-ingress` stage, you receive an output message +stating that the DNS record for `your_qhub_domain` "appears not to exist, has recently been updated, or has yet to fully +propagate." ### Reason for observed behavior As the output message mentions, this is likely the result of the non-deterministic behavior of DNS. -Without going into a deep dive of what DNS is or how it works, the issue encountered here is that the when QHub tries to lookup the IP address associated with the DNS record, -`your_qhub_domain`, nothing is returned. Unfortunately, this "lookup" is not as straight-forward as it sounds. To lookup the correct IP associated with this domain, many -intermediate servers (root, top level domain, and authoritative nameservers) are checked, each with their own cache which was updated an unknown time ago (usually on the order of -minutes but not always). +Without going into a deep dive of what DNS is or how it works, the issue encountered here is that the when QHub tries to +lookup the IP address associated with the DNS record, `your_qhub_domain`, nothing is returned. Unfortunately, this +"lookup" is not as straight-forward as it sounds. To lookup the correct IP associated with this domain, many +intermediate servers (root, top level domain, and authoritative nameservers) are checked, each with their own cache +which was updated an unknown time ago (usually on the order of minutes but not always). For those interested to learn more about DNS, [see this interesting comic](https://howdns.works/)). ### Troubleshooting -Again, as the output message mentions, it will ask if you want it to retry this DNS lookup again after another wait period; this wait period keeps increasing after each retry. -However, it's still possible that after waiting 15 or more minutes that the DNS still won't resolve. +Again, as the output message mentions, it will ask if you want it to retry this DNS lookup again after another wait +period; this wait period keeps increasing after each retry. However, it's still possible that after waiting 15 or more +minutes that the DNS still won't resolve. -At this point, feel free to cancel the deployment and rerun the same deployment command again in an hour or two. Although not guaranteed, it's extremely likely that the DNS will -resolve correctly after this prolonged wait period. +At this point, feel free to cancel the deployment and rerun the same deployment command again in an hour or two. +Although not guaranteed, it's extremely likely that the DNS will resolve correctly after this prolonged wait period. ## A Conda-Store environment fails to build ### Issue -One of the two (`dashboard` or `dask`) [Conda-Store](/~https://github.com/Quansight/conda-store) environments created during the initial QHub deployment fails to appear as options -when logged into JupyterHub. +One of the two (`dashboard` or `dask`) [Conda-Store](/~https://github.com/Quansight/conda-store) environments created +during the initial QHub deployment fails to appear as options when logged into JupyterHub. -If your user has access to Conda-Store, you can verify this by visiting `.com/conda-store` and having a look at the build status of the missing environment. +If your user has access to Conda-Store, you can verify this by visiting `.com/conda-store` and having +a look at the build status of the missing environment. ### Reason for observed behavior -The reason for this issue is due to how these environments are simultaneously built. Under the hood, Conda-Store relies on Mamba/Conda to resolve and download the specific packages -listed in the environment YAML. If they both environment builds try to download the same package with different versions, the build that started first will have their package +The reason for this issue is due to how these environments are simultaneously built. Under the hood, Conda-Store relies +on Mamba/Conda to resolve and download the specific packages listed in the environment YAML. If they both environment +builds try to download the same package with different versions, the build that started first will have their package overwritten by the second build. This causes the first build to fail. ### Troubleshooting -To resolve this issue, navigate to `.com/conda-store`, find the environment build that failed and trigger it to re-build. +To resolve this issue, navigate to `.com/conda-store`, find the environment build that failed and +trigger it to re-build. diff --git a/nebari/provider/cicd/github.py b/nebari/provider/cicd/github.py index 3801740922..67041ac39d 100644 --- a/nebari/provider/cicd/github.py +++ b/nebari/provider/cicd/github.py @@ -219,7 +219,7 @@ class NebariLinter(GHA): def checkout_image_step(): return GHA_job_step( name="Checkout Image", - uses="actions/checkout@master", + uses="actions/checkout@v3", with_={ "token": GHA_job_steps_extras( __root__="${{ secrets.REPOSITORY_ACCESS_TOKEN }}" @@ -231,7 +231,7 @@ def checkout_image_step(): def setup_python_step(): return GHA_job_step( name="Set up Python", - uses="actions/setup-python@v2", + uses="actions/setup-python@v4", with_={ "python-version": GHA_job_steps_extras( __root__=LATEST_SUPPORTED_PYTHON_VERSION