diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/03-profiles.py b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/03-profiles.py index 22193e79dc..26e10c648b 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/03-profiles.py +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/03-profiles.py @@ -82,11 +82,11 @@ def base_profile_home_mounts(username): } -def base_profile_shared_mounts(groups): +def base_profile_shared_mounts(groups_to_volume_mount): """Configure the group directory mounts for user. - Ensure that {shared}/{group} directory exists and user has - permissions to read/write/execute. Kubernetes does not allow the + Ensure that {shared}/{group} directory exists based on the scope availability + and if user has permissions to read/write/execute. Kubernetes does not allow the same pvc to be a volume thus we must check that the home and share pvc are not the same for some operation. @@ -103,40 +103,42 @@ def base_profile_shared_mounts(groups): {"name": "shared", "persistentVolumeClaim": {"claimName": shared_pvc_name}} ) - extra_container_config = { - "volumeMounts": [ - { - "mountPath": pod_shared_mount_path.format(group=group), - "name": "shared" if home_pvc_name != shared_pvc_name else "home", - "subPath": pvc_shared_mount_path.format(group=group), - } - for group in groups - ] - } + extra_container_config = {"volumeMounts": []} MKDIR_OWN_DIRECTORY = "mkdir -p /mnt/{path} && chmod 777 /mnt/{path}" command = " && ".join( [ MKDIR_OWN_DIRECTORY.format(path=pvc_shared_mount_path.format(group=group)) - for group in groups + for group in groups_to_volume_mount ] ) + init_containers = [ { "name": "initialize-shared-mounts", "image": "busybox:1.31", "command": ["sh", "-c", command], "securityContext": {"runAsUser": 0}, - "volumeMounts": [ - { - "mountPath": f"/mnt/{pvc_shared_mount_path.format(group=group)}", - "name": "shared" if home_pvc_name != shared_pvc_name else "home", - "subPath": pvc_shared_mount_path.format(group=group), - } - for group in groups - ], + "volumeMounts": [], } ] + + for group in groups_to_volume_mount: + extra_container_config["volumeMounts"].append( + { + "mountPath": pod_shared_mount_path.format(group=group), + "name": "shared" if home_pvc_name != shared_pvc_name else "home", + "subPath": pvc_shared_mount_path.format(group=group), + } + ) + init_containers[0]["volumeMounts"].append( + { + "mountPath": f"/mnt/{pvc_shared_mount_path.format(group=group)}", + "name": "shared" if home_pvc_name != shared_pvc_name else "home", + "subPath": pvc_shared_mount_path.format(group=group), + } + ) + return { "extra_pod_config": extra_pod_config, "extra_container_config": extra_container_config, @@ -475,7 +477,9 @@ def profile_conda_store_viewer_token(): } -def render_profile(profile, username, groups, keycloak_profilenames): +def render_profile( + profile, username, groups, keycloak_profilenames, groups_to_volume_mount +): """Render each profile for user. If profile is not available for given username, groups returns @@ -513,7 +517,7 @@ def render_profile(profile, username, groups, keycloak_profilenames): deep_merge, [ base_profile_home_mounts(username), - base_profile_shared_mounts(groups), + base_profile_shared_mounts(groups_to_volume_mount), profile_conda_store_mounts(username, groups), base_profile_extra_mounts(), configure_user(username, groups), @@ -552,21 +556,31 @@ def render_profiles(spawner): auth_state = yield spawner.user.get_auth_state() username = auth_state["oauth_user"]["preferred_username"] + # only return the lowest level group name # e.g. /projects/myproj -> myproj # and /developers -> developers groups = [Path(group).name for group in auth_state["oauth_user"]["groups"]] - spawner.log.info(f"user info: {username} {groups}") keycloak_profilenames = auth_state["oauth_user"].get("jupyterlab_profiles", []) + groups_with_permission_to_mount = auth_state.get( + "groups_with_permission_to_mount", [] + ) + # fetch available profiles and render additional attributes profile_list = z2jh.get_config("custom.profiles") return list( filter( None, [ - render_profile(p, username, groups, keycloak_profilenames) + render_profile( + p, + username, + groups, + keycloak_profilenames, + groups_with_permission_to_mount, + ) for p in profile_list ], ) diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/04-auth.py b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/04-auth.py index cbd20a4418..2694b2a34e 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/04-auth.py +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/files/jupyterhub/04-auth.py @@ -1,3 +1,4 @@ +import asyncio import json import os import time @@ -55,13 +56,27 @@ async def update_auth_model(self, auth_model): user_roles_rich = await self._get_roles_with_attributes( roles=user_roles, client_id=jupyterhub_client_id, token=token ) + + # Include which groups have permission to mount shared directories (user by + # profiles.py) + auth_model["auth_state"]["groups_with_permission_to_mount"] = ( + await self.get_client_groups_with_mount_permissions( + user_groups=auth_model["auth_state"]["oauth_user"]["groups"], + user_roles=user_roles_rich, + client_id=jupyterhub_client_id, + token=token, + ) + ) + keycloak_api_call_time_taken = time.time() - keycloak_api_call_start user_roles_rich_names = {role["name"] for role in user_roles_rich} + user_roles_non_jhub_client = [ {"name": role} for role in user_roles_from_claims if role in (user_roles_from_claims - user_roles_rich_names) ] + auth_model["roles"] = [ { "name": role["name"], @@ -70,12 +85,16 @@ async def update_auth_model(self, auth_model): } for role in [*user_roles_rich, *user_roles_non_jhub_client] ] + # note: because the roles check is comprehensive, we need to re-add the admin and user roles if auth_model["admin"]: auth_model["roles"].append({"name": "admin"}) + if await self.check_allowed(auth_model["name"], auth_model): auth_model["roles"].append({"name": "user"}) + execution_time = time.time() - start + self.log.info( f"Auth model update complete, time taken: {execution_time}s " f"time taken for keycloak api call: {keycloak_api_call_time_taken}s " @@ -116,6 +135,7 @@ async def load_managed_roles(self): client_roles_rich = await self._get_jupyterhub_client_roles( jupyterhub_client_id=jupyterhub_client_id, token=token ) + # Includes roles like "default-roles-nebari", "offline_access", "uma_authorization" realm_roles = await self._fetch_api(endpoint="roles", token=token) roles = { @@ -126,38 +146,117 @@ async def load_managed_roles(self): } for role in [*realm_roles, *client_roles_rich] } + # we could use either `name` (e.g. "developer") or `path` ("/developer"); # since the default claim key returns `path`, it seems preferable. - group_name_key = "path" for realm_role in realm_roles: role_name = realm_role["name"] role = roles[role_name] # fetch role assignments to groups - groups = await self._fetch_api(f"roles/{role_name}/groups", token=token) - role["groups"] = [group[group_name_key] for group in groups] - # fetch role assignments to users - users = await self._fetch_api(f"roles/{role_name}/users", token=token) - role["users"] = [user["username"] for user in users] + role.update( + await self._get_users_and_groups_for_role( + role_name, + token=token, + ) + ) + for client_role in client_roles_rich: role_name = client_role["name"] role = roles[role_name] # fetch role assignments to groups - groups = await self._fetch_api( - f"clients/{jupyterhub_client_id}/roles/{role_name}/groups", token=token - ) - role["groups"] = [group[group_name_key] for group in groups] - # fetch role assignments to users - users = await self._fetch_api( - f"clients/{jupyterhub_client_id}/roles/{role_name}/users", token=token + role.update( + await self._get_users_and_groups_for_role( + role_name, + token=token, + client_id=jupyterhub_client_id, + ) ) - role["users"] = [user["username"] for user in users] return list(roles.values()) + async def get_client_groups_with_mount_permissions( + self, user_groups, user_roles, client_id, token + ): + """ + Asynchronously retrieves the list of client groups with mount permissions + that the user belongs to. + """ + + roles_with_permission = [] + groups_with_permission_to_mount = set() + + # Filter roles with the shared-directory component and scope + for role in user_roles: + attributes = role.get("attributes", {}) + + role_component = attributes.get("component", [None])[0] + role_scopes = attributes.get("scopes", [None])[0] + + if ( + role_component == "shared-directory" + and role_scopes == "write:shared-mount" + ): + role_name = role.get("name") + roles_with_permission.append(role_name) + + # Fetch groups for all relevant roles concurrently + group_fetch_tasks = [ + self._fetch_api( + endpoint=f"clients/{client_id}/roles/{role_name}/groups", + token=token, + ) + for role_name in roles_with_permission + ] + + all_role_groups = await asyncio.gather(*group_fetch_tasks) + + # Collect group names with permissions + for role_groups in all_role_groups: + groups_with_permission_to_mount |= set( + [group["path"] for group in role_groups] + ) + + return list(groups_with_permission_to_mount & set(user_groups)) + + async def _get_users_and_groups_for_role( + self, role_name, token, client_id=None, group_name_key="path" + ): + """ + Asynchronously fetches and maps groups and users to a specified role. + + Returns: + dict: A dictionary with groups (path or name) and users mapped to the role. + { + "groups": ["/group1", "/group2"], + "users": ["user1", "user2"], + }, + """ + # Prepare endpoints + group_endpoint = f"roles/{role_name}/groups" + user_endpoint = f"roles/{role_name}/users" + + if client_id: + group_endpoint = f"clients/{client_id}/roles/{role_name}/groups" + user_endpoint = f"clients/{client_id}/roles/{role_name}/users" + + # fetch role assignments to groups (Fetch data concurrently) + groups, users = await asyncio.gather( + *[ + self._fetch_api(endpoint=group_endpoint, token=token), + self._fetch_api(endpoint=user_endpoint, token=token), + ] + ) + + # Process results + return { + "groups": [group[group_name_key] for group in groups], + "users": [user["username"] for user in users], + } + def _get_scope_from_role(self, role): """Return scopes from role if the component is jupyterhub""" role_scopes = role.get("attributes", {}).get("scopes", []) - component = role.get("attributes", {}).get("component") + component = role.get("attributes", {}).get("component", []) # Attributes are returned as a single-element array, unless `##` delimiter is used in Keycloak # See this: https://stackoverflow.com/questions/68954733/keycloak-client-role-attribute-array if component == ["jupyterhub"] and role_scopes: diff --git a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf index 570fda80c0..06cd4d6dd1 100644 --- a/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf +++ b/src/_nebari/stages/kubernetes_services/template/modules/kubernetes/services/jupyterhub/main.tf @@ -321,6 +321,16 @@ module "jupyterhub-openid-client" { "component" : "jupyterhub" } }, + { + "name" : "allow-group-directory-creation-role", + "description" : "Grants a group the ability to manage the creation of its corresponding mounted directory.", + "groups" : ["admin", "analyst", "developer"], + "attributes" : { + # grants permissions to mount group folder to shared dir + "scopes" : "write:shared-mount", + "component" : "shared-directory" + } + }, ] callback-url-paths = [ "https://${var.external-url}/hub/oauth_callback", diff --git a/tests/tests_deployment/keycloak_utils.py b/tests/tests_deployment/keycloak_utils.py index 991f62857d..96b302108f 100644 --- a/tests/tests_deployment/keycloak_utils.py +++ b/tests/tests_deployment/keycloak_utils.py @@ -81,6 +81,16 @@ def create_keycloak_role(client_name: str, role_name: str, scopes: str, componen ) +def get_keycloak_client_role(client_name, role_name): + keycloak_admin = get_keycloak_admin() + client_details = get_keycloak_client_details_by_name( + client_name=client_name, keycloak_admin=keycloak_admin + ) + return keycloak_admin.get_client_role( + client_id=client_details["id"], role_name=role_name + ) + + def get_keycloak_client_roles(client_name): keycloak_admin = get_keycloak_admin() client_details = get_keycloak_client_details_by_name( @@ -89,6 +99,13 @@ def get_keycloak_client_roles(client_name): return keycloak_admin.get_client_roles(client_id=client_details["id"]) +def get_keycloak_role_groups(client_id, role_name): + keycloak_admin = get_keycloak_admin() + return keycloak_admin.get_client_role_groups( + client_id=client_id, role_name=role_name + ) + + def delete_client_keycloak_test_roles(client_name): keycloak_admin = get_keycloak_admin() client_details = get_keycloak_client_details_by_name( diff --git a/tests/tests_deployment/test_jupyterhub_api.py b/tests/tests_deployment/test_jupyterhub_api.py index f7bc33637b..aaeaf535ac 100644 --- a/tests/tests_deployment/test_jupyterhub_api.py +++ b/tests/tests_deployment/test_jupyterhub_api.py @@ -6,7 +6,10 @@ from tests.tests_deployment.keycloak_utils import ( assign_keycloak_client_role_to_user, create_keycloak_role, + get_keycloak_client_details_by_name, + get_keycloak_client_role, get_keycloak_client_roles, + get_keycloak_role_groups, ) from tests.tests_deployment.utils import get_refresh_jupyterhub_token @@ -33,6 +36,7 @@ def test_jupyterhub_loads_roles_from_keycloak(jupyterhub_access_token): "view-profile", # default roles "allow-read-access-to-services-role", + "allow-group-directory-creation-role", } @@ -52,6 +56,40 @@ def test_check_default_roles_added_in_keycloak(): role_names = [role["name"] for role in client_roles] assert "allow-app-sharing-role" in role_names assert "allow-read-access-to-services-role" in role_names + assert "allow-group-directory-creation-role" in role_names + + +@pytest.mark.filterwarnings( + "ignore:.*auto_refresh_token is deprecated:DeprecationWarning" +) +@pytest.mark.filterwarnings("ignore::urllib3.exceptions.InsecureRequestWarning") +def test_check_directory_creation_scope_attributes(): + client_role = get_keycloak_client_role( + client_name="jupyterhub", role_name="allow-group-directory-creation-role" + ) + assert client_role["attributes"]["component"][0] == "shared-directory" + assert client_role["attributes"]["scopes"][0] == "write:shared-mount" + + +@pytest.mark.filterwarnings( + "ignore:.*auto_refresh_token is deprecated:DeprecationWarning" +) +@pytest.mark.filterwarnings("ignore::urllib3.exceptions.InsecureRequestWarning") +def test_groups_with_mount_permissions(): + client_role = get_keycloak_client_role( + client_name="jupyterhub", role_name="allow-group-directory-creation-role" + ) + client_details = get_keycloak_client_details_by_name(client_name="jupyterhub") + role_groups = get_keycloak_role_groups( + client_id=client_details["id"], role_name=client_role["name"] + ) + assert set([group["path"] for group in role_groups]) == set( + [ + "/developer", + "/admin", + "/analyst", + ] + ) @token_parameterized(note="before-role-creation-and-assignment")