zama-ai · youben11 · May 7, 2024 · May 3, 2024
diff --git a/.github/workflows/concrete_python_release.yml b/.github/workflows/concrete_python_release.yml
@@ -193,6 +193,8 @@ jobs:
     needs: [build-linux-x86, build-macos]
     runs-on: ubuntu-latest
     steps:
+      - uses: actions/checkout@v3
+
       - uses: actions/download-artifact@v4
         with:
           path: wheels
@@ -216,6 +218,21 @@ jobs:
             -p "${{ secrets.PUBLIC_PYPI_PASSWORD }}" \
             -r pypi
 
+      - name: Upload wheels to S3
+        if: ${{ env.RELEASE_TYPE == 'public' }}
+        env:
+          AWS_ACCESS_KEY_ID: ${{ secrets.AWS_IAM_ID }}
+          AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_IAM_KEY }}
+          AWS_DEFAULT_REGION: ${{ secrets.AWS_REGION }}
+          S3_BUCKET_NAME: ${{ secrets.AWS_S3_PYPI_BUCKET_NAME }}
+          CLOUDFRONT_DISTRIBUTION_ID: ${{ secrets.AWS_CLOUDFRONT_PYPI_DISTRIBUTION_ID }}
+        run: |
+          pip install boto3 bigtree
+          # upload wheels
+          aws s3 sync ./wheels/ s3://${S3_BUCKET_NAME}/concrete-python
+          # update indexes and invalidate cloudfront cache
+          python .github/workflows/scripts/s3_update_html_indexes.py
+
       - name: Start pushing Docker images
         if: ${{ env.RELEASE_TYPE == 'public' }}
         run: |

diff --git a/.github/workflows/scripts/s3_update_html_indexes.py b/.github/workflows/scripts/s3_update_html_indexes.py
@@ -0,0 +1,162 @@
+import os
+import hashlib
+import base64
+import pprint
+import boto3
+from time import time
+from bigtree import list_to_tree
+
+
+# template to generate an html index
+html_a_format = '<a href="{}">{}</a><br/>\n'
+html_index_format = """
+<!DOCTYPE html>
+<html>
+  <body>
+    {}
+  </body>
+</html>
+"""
+
+
+def get_s3_bucket_by_name(name: str):
+    """Get the s3 bucket with the given name.
+
+    The function assumes there is a bucket with the given name, and will fail otherwise.
+
+    Args:
+        name (str): bucket name
+
+    Returns:
+        s3 bucket with the given name
+    """
+    s3 = boto3.resource("s3")
+    buckets = s3.buckets.all()
+    filtered = list(filter(lambda b: b.name == name, buckets))
+    assert len(filtered) == 1, f"Expected a single bucket, but found {len(filtered)}"
+    return filtered[0]
+
+
+def objects_to_file_tree(objects):
+    """Get the file tree given objects in an s3 bucket.
+
+    It assumes object keys represent directories (e.g. concrete-python/* are all under concrete-python directory).
+
+    Args:
+        objects (list of s3 objects): objects from an s3 bucket
+
+    Returns:
+        file tree of the given s3 objects
+    """
+    paths = []
+    for obj in objects:
+        # we prefix all objects with 'root/' so that we get a file tree that is all under a unique directory.
+        # this is considered later on and should be removed to compute object keys
+        paths.append(f"root/{obj.key}")
+    return list_to_tree(paths)
+
+
+def build_indexes(file_tree):
+    """Build an html index for every directory in the file tree.
+
+    Args:
+        file_tree: the file tree we build indexes for
+
+    Returns:
+        dict: html index per object key (e.g. {"concrete-python/index.html": "HTML INDEX"})
+    """
+    index_per_path = {}
+    files = []
+    for child in file_tree.children:
+        # if it's a direcoty then we call the function recursively to build indexes of that directory
+        if not child.is_leaf:
+            child_index_per_path = build_indexes(child)
+            index_per_path.update(child_index_per_path)
+            # we build a link relative to the current directory
+            link = f"{child.name}/index.html"
+            files.append((link, child.name))
+        # if it's a file then we add it to the list of files of the current directory to index it
+        else:
+            # we don't need to index index files
+            if child.name == "index.html":
+                continue
+            # remove "/root" and build link from root '/'
+            assert child.path_name.startswith("/root")
+            link = child.path_name.removeprefix("/root")
+            files.append((link, child.name))
+
+    # remove "/root" and append the index filename
+    if file_tree.is_root:
+        index_path = "index.html"
+    else:
+        assert file_tree.path_name.startswith("/root/")
+        index_path = file_tree.path_name.removeprefix("/root/") + "/index.html"
+
+    # Build the html index of the current directory
+    refs = ""
+    for f in files:
+        html_a = html_a_format.format(f[0], f[1])
+        refs = refs + html_a
+    index_per_path[index_path] = html_index_format.format(refs)
+    return index_per_path
+
+
+def invalidate_cloudfront_cache(distribution_id, items_to_invalidate):
+    """Invalidate CloudFront cache for a list of items.
+
+    Args:
+        distribution_id (str): CloudFront distribution id
+        items_to_invalidate (List[str]): list of items to invalidate
+
+    Returns:
+        dict: invalidation response
+    """
+    client = boto3.client("cloudfront")
+    return client.create_invalidation(
+        DistributionId=distribution_id,
+        InvalidationBatch={
+            "Paths": {
+                "Quantity": len(items_to_invalidate),
+                "Items": items_to_invalidate,
+            },
+            "CallerReference": str(time()).replace(".", ""),
+        },
+    )
+
+
+if __name__ == "__main__":
+    # retrieve bucket
+    s3_bucket_name = os.environ.get("S3_BUCKET_NAME")
+    if s3_bucket_name is None:
+        raise RuntimeError("S3_BUCKET_NAME env variable should be set")
+    bucket = get_s3_bucket_by_name(s3_bucket_name)
+    # get all objects in the bucket
+    objects = list(bucket.objects.all())
+    # build a file_tree from the list of objects
+    file_tree = objects_to_file_tree(objects)
+    # build html indexes for every directory in the file_tree
+    index_per_path = build_indexes(file_tree)
+    # upload indexes to the appropriate location
+    for path, index in index_per_path.items():
+        # we log html indexes and their key (location) in the bucket
+        print(f"Writing index to {path}:\n{index}\n")
+        # body has to be bytes
+        body = index.encode()
+        # checksum is the base64 encoded md5
+        body_checksum = base64.b64encode(hashlib.md5(body).digest()).decode()
+        # ContentType isn't inferred automatically, so we specify it to make sure the server
+        # handles it properly (don't download a file when requested)
+        bucket.put_object(
+            Key=path, Body=body, ContentMD5=body_checksum, ContentType="text/html"
+        )
+    # invalidate cache for the indexes
+    cloudfront_distribution_id = os.environ.get("CLOUDFRONT_DISTRIBUTION_ID")
+    if cloudfront_distribution_id is None:
+        raise RuntimeError("CLOUDFRONT_DISTRIBUTION_ID env variable should be set")
+    keys_to_invalidate = ["/" + k for k in index_per_path.keys()]
+    print("Invalidating CloudFront cache for the following keys:", keys_to_invalidate)
+    response = invalidate_cloudfront_cache(
+        cloudfront_distribution_id, keys_to_invalidate
+    )
+    print("CloudFront invalidation response:")
+    pprint.pprint(response)