From 96b9c27cdff0c3c946b22a1f6156c2cff7136995 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Thu, 22 Aug 2024 14:26:41 -0700
Subject: [PATCH 01/10] Add programmable Google Search to pytorch tutorials
 site (#2820)

* Add programmable Google Search to pytorch tutorials site
---
 _static/css/custom.css |  9 +++++++++
 _templates/layout.html | 17 +++++++++++++++++
 2 files changed, 26 insertions(+)
diff --git a/_static/css/custom.css b/_static/css/custom.css
index a467a08815..09aba28f25 100755
--- a/_static/css/custom.css
+++ b/_static/css/custom.css
@@ -91,3 +91,12 @@
     transition: none;
     transform-origin: none;
 }
+
+.pytorch-left-menu-search input[type=text] {
+    background-image: none;
+}
+
+.gsc-control-cse {
+   padding-left: 0px !important;
+   padding-bottom: 0px !important;
+}
diff --git a/_templates/layout.html b/_templates/layout.html
index 22129040e4..1c632de63f 100644
--- a/_templates/layout.html
+++ b/_templates/layout.html
@@ -11,6 +11,23 @@
 </script>
 {%- endblock %}
 
+{% block sidebartitle %}
+    {% if theme_display_version %}
+      {%- set nav_version = version %}
+      {% if READTHEDOCS and current_version %}
+        {%- set nav_version = current_version %}
+      {% endif %}
+      {% if nav_version %}
+        <div class="version">
+            {{ nav_version }}
+        </div>
+      {% endif %}
+    {% endif %}
+    <div class="searchbox">
+        <script async src="https://cse.google.com/cse.js?cx=e65585f8c3ea1440e"></script>
+        <div class="gcse-search"></div>
+    </div>
+{% endblock %}
 
 {% block footer %}
 {{ super() }}

From ea2dfc67b42c24d999edff0235702a314aab466a Mon Sep 17 00:00:00 2001
From: Ankith Gunapal <agunapal@ischool.Berkeley.edu>
Date: Fri, 23 Aug 2024 10:58:18 -0700
Subject: [PATCH 02/10] Tutorial for AOTI Python runtime (#2997)

* Tutorial for AOTI Python runtime
---------

Co-authored-by: Svetlana Karslioglu <svekars@meta.com>
Co-authored-by: Angela Yi <angelayi@meta.com>
---
 .ci/docker/build.sh                        |   3 +-
 .ci/docker/common/common_utils.sh          |   2 +-
 .ci/docker/requirements.txt                |   6 +-
 .jenkins/metadata.json                     |   3 +
 en-wordlist.txt                            |   3 +-
 recipes_source/recipes_index.rst           |   6 +
 recipes_source/torch_export_aoti_python.py | 220 +++++++++++++++++++++
 7 files changed, 237 insertions(+), 6 deletions(-)
 create mode 100644 recipes_source/torch_export_aoti_python.py

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index 31f42fdbd8..c646b8f9a8 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -11,8 +11,9 @@ IMAGE_NAME="$1"
 shift
 
 export UBUNTU_VERSION="20.04"
+export CUDA_VERSION="12.4.1"
 
-export BASE_IMAGE="ubuntu:${UBUNTU_VERSION}"
+export BASE_IMAGE="nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}"
 echo "Building ${IMAGE_NAME} Docker image"
 
 docker build \
diff --git a/.ci/docker/common/common_utils.sh b/.ci/docker/common/common_utils.sh
index b20286a409..c7eabda555 100644
--- a/.ci/docker/common/common_utils.sh
+++ b/.ci/docker/common/common_utils.sh
@@ -22,5 +22,5 @@ conda_run() {
 }
 
 pip_install() {
-  as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip install --progress-bar off $*
+  as_ci_user conda run -n py_$ANACONDA_PYTHON_VERSION pip3 install --progress-bar off $*
 }
diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt
index 00cf2f2103..9668b17fc3 100644
--- a/.ci/docker/requirements.txt
+++ b/.ci/docker/requirements.txt
@@ -30,8 +30,8 @@ pytorch-lightning
 torchx
 torchrl==0.5.0
 tensordict==0.5.0
-ax-platform>==0.4.0
-nbformat>==5.9.2
+ax-platform>=0.4.0
+nbformat>=5.9.2
 datasets
 transformers
 torchmultimodal-nightly # needs to be updated to stable as soon as it's avaialable
@@ -68,4 +68,4 @@ pygame==2.1.2
 pycocotools
 semilearn==0.3.2
 torchao==0.0.3
-segment_anything==1.0
\ No newline at end of file
+segment_anything==1.0
diff --git a/.jenkins/metadata.json b/.jenkins/metadata.json
index 4814f9a7d2..2f1a9933aa 100644
--- a/.jenkins/metadata.json
+++ b/.jenkins/metadata.json
@@ -28,6 +28,9 @@
   "intermediate_source/model_parallel_tutorial.py": {
     "needs": "linux.16xlarge.nvidia.gpu"
   },
+  "recipes_source/torch_export_aoti_python.py": {
+    "needs": "linux.g5.4xlarge.nvidia.gpu"
+  }, 
   "advanced_source/pendulum.py": {
     "needs": "linux.g5.4xlarge.nvidia.gpu",
     "_comment": "need to be here for the compiling_optimizer_lr_scheduler.py to run."
diff --git a/en-wordlist.txt b/en-wordlist.txt
index 62762ab69c..e69cbaa1a5 100644
--- a/en-wordlist.txt
+++ b/en-wordlist.txt
@@ -2,6 +2,7 @@
 ACL
 ADI
 AOT
+AOTInductor
 APIs
 ATen
 AVX
@@ -617,4 +618,4 @@ warmstarting
 warmup
 webp
 wsi
-wsis
\ No newline at end of file
+wsis
diff --git a/recipes_source/recipes_index.rst b/recipes_source/recipes_index.rst
index d94d7d5c22..caccdcc28f 100644
--- a/recipes_source/recipes_index.rst
+++ b/recipes_source/recipes_index.rst
@@ -150,6 +150,12 @@ Recipes are bite-sized, actionable examples of how to use specific PyTorch featu
    :link: ../recipes/recipes/swap_tensors.html
    :tags: Basics
 
+.. customcarditem::
+   :header: torch.export AOTInductor Tutorial for Python runtime
+   :card_description: Learn an end-to-end example of how to use AOTInductor for python runtime.
+   :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png
+   :link: ../recipes/torch_export_aoti_python.html
+   :tags: Basics
 
 .. Interpretability
 
diff --git a/recipes_source/torch_export_aoti_python.py b/recipes_source/torch_export_aoti_python.py
new file mode 100644
index 0000000000..136862078c
--- /dev/null
+++ b/recipes_source/torch_export_aoti_python.py
@@ -0,0 +1,220 @@
+# -*- coding: utf-8 -*-
+
+"""
+(Beta) ``torch.export`` AOTInductor Tutorial for Python runtime
+===============================================================
+**Author:** Ankith Gunapal, Bin Bao, Angela Yi
+"""
+
+######################################################################
+#
+# .. warning::
+#
+#     ``torch._inductor.aot_compile`` and ``torch._export.aot_load`` are in Beta status and are subject to backwards compatibility
+#     breaking changes. This tutorial provides an example of how to use these APIs for model deployment using Python runtime.
+#
+# It has been shown `previously <https://pytorch.org/docs/stable/torch.compiler_aot_inductor.html#>`__ how AOTInductor can be used 
+# to do Ahead-of-Time compilation of PyTorch exported models by creating
+# a shared library that can be run in a non-Python environment.
+#
+#
+# In this tutorial, you will learn an end-to-end example of how to use AOTInductor for python runtime.
+# We will look at how  to use :func:`torch._inductor.aot_compile` along with :func:`torch.export.export` to generate a 
+# shared library. Additionally, we will examine how to execute the shared library in Python runtime using :func:`torch._export.aot_load`.
+# You will learn about the speed up seen in the first inference time using AOTInductor, especially when using 
+# ``max-autotune`` mode which can take some time to execute.
+#
+# **Contents**
+#
+# .. contents::
+#     :local:
+
+######################################################################
+# Prerequisites
+# -------------
+# * PyTorch 2.4 or later
+# * Basic understanding of ``torch.export`` and AOTInductor
+# * Complete the `AOTInductor: Ahead-Of-Time Compilation for Torch.Export-ed Models <https://pytorch.org/docs/stable/torch.compiler_aot_inductor.html#>`_ tutorial
+
+######################################################################
+# What you will learn
+# ----------------------
+# * How to use AOTInductor for python runtime.
+# * How  to use :func:`torch._inductor.aot_compile` along with :func:`torch.export.export` to generate a shared library
+# * How to run a shared library in Python runtime using :func:`torch._export.aot_load`.
+# * When do you use AOTInductor for python runtime
+
+######################################################################
+# Model Compilation
+# -----------------
+#
+# We will use the TorchVision pretrained `ResNet18` model and TorchInductor on the 
+# exported PyTorch program using :func:`torch._inductor.aot_compile`.
+#
+# .. note::
+#
+#       This API also supports :func:`torch.compile` options like ``mode``
+#       This means that if used on a CUDA enabled device, you can, for example, set ``"max_autotune": True``
+#       which leverages Triton based matrix multiplications & convolutions, and enables CUDA graphs by default.
+#
+# We also specify ``dynamic_shapes`` for the batch dimension. In this example, ``min=2`` is not a bug and is 
+# explained in `The 0/1 Specialization Problem <https://docs.google.com/document/d/16VPOa3d-Liikf48teAOmxLc92rgvJdfosIy-yoT38Io/edit?fbclid=IwAR3HNwmmexcitV0pbZm_x1a4ykdXZ9th_eJWK-3hBtVgKnrkmemz6Pm5jRQ#heading=h.ez923tomjvyk>`__
+
+
+import os
+import torch
+from torchvision.models import ResNet18_Weights, resnet18
+
+model = resnet18(weights=ResNet18_Weights.DEFAULT)
+model.eval()
+
+with torch.inference_mode():
+
+    # Specify the generated shared library path
+    aot_compile_options = {
+            "aot_inductor.output_path": os.path.join(os.getcwd(), "resnet18_pt2.so"),
+    }
+    if torch.cuda.is_available():
+        device = "cuda"
+        aot_compile_options.update({"max_autotune": True})
+    else:
+        device = "cpu"
+
+    model = model.to(device=device)
+    example_inputs = (torch.randn(2, 3, 224, 224, device=device),)
+
+    # min=2 is not a bug and is explained in the 0/1 Specialization Problem
+    batch_dim = torch.export.Dim("batch", min=2, max=32)
+    exported_program = torch.export.export(
+        model,
+        example_inputs,
+        # Specify the first dimension of the input x as dynamic
+        dynamic_shapes={"x": {0: batch_dim}},
+    )
+    so_path = torch._inductor.aot_compile(
+        exported_program.module(),
+        example_inputs,
+        # Specify the generated shared library path
+        options=aot_compile_options
+    )
+
+
+######################################################################
+# Model Inference in Python
+# -------------------------
+#
+# Typically, the shared object generated above is used in a non-Python environment. In PyTorch 2.3, 
+# we added a new API called :func:`torch._export.aot_load` to load the shared library in the Python runtime.
+# The API follows a structure similar to the :func:`torch.jit.load` API . You need to specify the path 
+# of the shared library and the device where it should be loaded.
+#
+# .. note::
+#      In the example above, we specified ``batch_size=1`` for inference and  it still functions correctly even though we specified ``min=2`` in 
+#      :func:`torch.export.export`.
+
+
+import os
+import torch
+
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model_so_path = os.path.join(os.getcwd(), "resnet18_pt2.so")
+
+model = torch._export.aot_load(model_so_path, device)
+example_inputs = (torch.randn(1, 3, 224, 224, device=device),)
+
+with torch.inference_mode():
+    output = model(example_inputs)
+
+######################################################################
+# When to use AOTInductor for Python Runtime
+# ------------------------------------------
+#
+# One of the requirements for using AOTInductor is that the model shouldn't have any graph breaks.
+# Once this requirement is met, the primary use case for using AOTInductor Python Runtime is for
+# model deployment using Python.
+# There are mainly two reasons why you would use AOTInductor Python Runtime:
+#
+# -  ``torch._inductor.aot_compile`` generates a shared library. This is useful for model
+#    versioning for deployments and tracking model performance over time.
+# -  With :func:`torch.compile` being a JIT compiler, there is a warmup
+#    cost associated with the first compilation. Your deployment needs to account for the
+#    compilation time taken for the first inference. With AOTInductor, the compilation is
+#    done offline using ``torch.export.export`` & ``torch._indutor.aot_compile``. The deployment
+#    would only load the shared library using ``torch._export.aot_load`` and run inference.
+#
+#
+# The section below shows the speedup achieved with AOTInductor for first inference
+#
+# We define a utility function ``timed`` to measure the time taken for inference
+#
+
+import time
+def timed(fn):
+    # Returns the result of running `fn()` and the time it took for `fn()` to run,
+    # in seconds. We use CUDA events and synchronization for accurate
+    # measurement on CUDA enabled devices.
+    if torch.cuda.is_available():
+        start = torch.cuda.Event(enable_timing=True)
+        end = torch.cuda.Event(enable_timing=True)
+        start.record()
+    else:
+        start = time.time()
+
+    result = fn()
+    if torch.cuda.is_available():
+        end.record()
+        torch.cuda.synchronize()
+    else:
+        end = time.time()
+
+    # Measure time taken to execute the function in miliseconds
+    if torch.cuda.is_available():
+        duration = start.elapsed_time(end)
+    else:
+        duration = (end - start) * 1000
+
+    return result, duration
+
+
+######################################################################
+# Lets measure the time for first inference using AOTInductor
+
+torch._dynamo.reset()
+
+model = torch._export.aot_load(model_so_path, device)
+example_inputs = (torch.randn(1, 3, 224, 224, device=device),)
+
+with torch.inference_mode():
+    _, time_taken = timed(lambda: model(example_inputs))
+    print(f"Time taken for first inference for AOTInductor is {time_taken:.2f} ms")
+
+
+######################################################################
+# Lets measure the time for first inference using ``torch.compile``
+
+torch._dynamo.reset()
+
+model = resnet18(weights=ResNet18_Weights.DEFAULT).to(device)
+model.eval()
+
+model = torch.compile(model)
+example_inputs = torch.randn(1, 3, 224, 224, device=device)
+
+with torch.inference_mode():
+    _, time_taken = timed(lambda: model(example_inputs))
+    print(f"Time taken for first inference for torch.compile is {time_taken:.2f} ms")
+
+######################################################################
+# We see that there is a drastic speedup in first inference time using AOTInductor compared
+# to ``torch.compile``
+
+######################################################################
+# Conclusion
+# ----------
+#
+# In this recipe, we have learned how to effectively use the AOTInductor for Python runtime by 
+# compiling and loading a pretrained ``ResNet18`` model using the ``torch._inductor.aot_compile``
+# and ``torch._export.aot_load`` APIs. This process demonstrates the practical application of 
+# generating a shared library and running it within a Python environment, even with dynamic shape
+# considerations and device-specific optimizations. We also looked at the advantage of using 
+# AOTInductor in model deployments, with regards to speed up in first inference time.

From ec867b2f0a0764fb5139277247d8d07c76a82fe4 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Sat, 24 Aug 2024 11:14:07 -0700
Subject: [PATCH 03/10] Create tutorial_submission_policy.md (#2995)

- Add a policy for new tutorials submissions

---------

Co-authored-by: albanD <desmaison.alban@gmail.com>
Co-authored-by: Nikita Shulga <2453524+malfet@users.noreply.github.com>
---
 README.md                     |   2 +
 tutorial_submission_policy.md | 107 ++++++++++++++++++++++++++++++++++
 2 files changed, 109 insertions(+)
 create mode 100644 tutorial_submission_policy.md

diff --git a/README.md b/README.md
index 0c961afd26..fe4b4b6edd 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,8 @@ We use sphinx-gallery's [notebook styled examples](https://sphinx-gallery.github
 
 Here is how you can create a new tutorial (for a detailed description, see [CONTRIBUTING.md](./CONTRIBUTING.md)):
 
+NOTE: Before submitting a new tutorial, read [PyTorch Tutorial Submission Policy](./tutorial_submission_policy.md).
+
 1. Create a Python file. If you want it executed while inserted into documentation, save the file with the suffix `tutorial` so that the file name is `your_tutorial.py`.
 2. Put it in one of the `beginner_source`, `intermediate_source`, `advanced_source` directory based on the level of difficulty. If it is a recipe, add it to `recipes_source`. For tutorials demonstrating unstable prototype features, add to the `prototype_source`.
 3. For Tutorials (except if it is a prototype feature), include it in the `toctree` directive and create a `customcarditem` in [index.rst](./index.rst).
diff --git a/tutorial_submission_policy.md b/tutorial_submission_policy.md
new file mode 100644
index 0000000000..c5c3a80087
--- /dev/null
+++ b/tutorial_submission_policy.md
@@ -0,0 +1,107 @@
+# PyTorch Tutorial Submission Policy
+
+This policy outlines the criteria and process for submitting new
+tutorials to the PyTorch community.
+Our goal is to ensure that all tutorials are of high quality,
+relevant, and up-to-date, supporting both the growth of the PyTorch
+users and the evolution of the PyTorch framework itself. By following
+these guidelines, contributors can help us maintain a robust and
+informative educational environment.
+
+## Acceptance Criteria For New Tutorials
+
+We accept new tutorials that adhere to one of the following use cases:
+
+* **Demonstrate New PyTorch Features:** Tutorials that support new features
+  for upcoming PyTorch releases are typically authored by the engineers who
+  are developing these features. These tutorials are crucial for showcasing
+  the latest advancements in PyTorch. We typically do not require more than
+  one tutorial per feature.
+
+* **Tutorials showcasing PyTorch usage with other tools and libraries:** We
+  accept community-contributed tutorials that illustrate innovative uses of
+  PyTorch alongside other open-source projects, models, and tools. Please
+  ensure that your tutorial remains neutral and does not promote or endorse
+  proprietary technologies over others.
+
+The first use case does not require going through the submission
+process outlined below. If your tutorial falls under the second category,
+please read and follow the instructions in the
+**Submission Process For Community-Contributed Tutorials** section.
+
+## Submission Process For Community-Contributed Tutorials
+
+To maintain the quality and relevance of tutorials, we request that
+community-contributed tutorials undergo a review process. If you are
+interested in contributing a tutorial, please follow these steps:
+
+1. **Create an issue:**
+   * Open an issue in the pytorch/tutorials repository proposing the
+     new tutorial. Clearly explain the importance of the tutorial and
+     confirm that there is no existing tutorial covering the same or
+     similar topic. A tutorial should not disproportionately endorse
+     one technology over another. Please consult with Core Maintainers
+     to ensure your content adheres to these guidelines.
+     Use the provided [ISSUE_TEMPLATE](/~https://github.com/pytorch/tutorials/blob/main/.github/ISSUE_TEMPLATE/feature-request.yml) for the new tutorial request - select **Feature request** when submitting an issue.
+
+     * If there is an existing tutorial on the topic that you would
+       like to significantly refactor, you can submit a PR. In the
+       description of the PR, explain why the changes are needed and
+       how they improve the tutorial.
+
+   * These issues will be triaged by PyTorch maintainers on a case-by-case basis. 
+   * Link any supporting materials including discussions in other repositories.
+     
+1. **Await Approval:**
+   * Wait for a response from the PyTorch Tutorials maintainers. A PyTorch
+     tutorial maintainer will review your proposal and
+     determine whether a tutorial on the proposed topic is desirable.
+     A comment and an **approved** label will be added to your issue
+     by a maintainer. The review process for new tutorial PRs submitted
+     without the corresponding issue may take longer.
+     
+1. **Adhere to writing and styling guidelines:**
+   * Once approved, follow the guidelines outlined in [CONTRIBUTING.md](/~https://github.com/pytorch/tutorials/blob/main/CONTRIBUTING.md)
+     and use the provided [template](/~https://github.com/pytorch/tutorials/blob/main/beginner_source/template_tutorial.py) for creating your tutorial.
+   * Link the issue in which you received approval for your tutorial
+     in the PR.
+   * We accept tutorials in both ``.rst`` (ReStructuredText) and ``.py``
+     (Python) formats. However, unless your tutorial involves using
+     multiple GPU, parallel/distributed training, or requires extended
+     execution time (25 minutes or more), we prefer submissions
+     in Python file format.
+     
+## Maintaining Tutorials
+
+When you submit a new tutorial, we encourage you to keep it in sync
+with the latest PyTorch updates and features. Additionally, we may
+contact you to review any PRs, issues, and other related matters to
+ensure the tutorial remains a valuable resource.
+
+Please note the following: 
+
+* If a tutorial breaks against the main branch, it will
+  be excluded from the build and an issue will be filed against it,
+  with the author/maintainer notified. If the issue is not resolved
+  within 90 days, the tutorial might be deleted from the repository.
+
+* We recommend that each tutorial is reviewed at least once a year to
+  ensure its relevance.
+
+## Deleting Stale Tutorials
+
+A tutorial might be considered stale when it no longer aligns with
+the latest PyTorch updates, features, or best practices or best
+practices:
+
+* The tutorial is no longer functional due to changes in PyTorch or
+  its dependencies
+* The tutorial has been superseded by a newer, more comprehensive, or
+  more accurate tutorial
+* The tutorial does not run successfully in the (CI), indicating
+  potential compatibility or dependency issues.
+
+If a tutorial is deemed stale, we will attempt to contact the code owner,
+or someone from the tutorial mainatainers might attempt to update it.
+However, if despite those attempts we fail to fix it, the tutorial
+might be removed from the repository.

From 4c1dd9d3fa1e6b41ef53dfc49ae5338fd6ce28cd Mon Sep 17 00:00:00 2001
From: Tim Statler <tim.statler@gmail.com>
Date: Mon, 26 Aug 2024 19:02:14 -0700
Subject: [PATCH 04/10] Removed upper-case letter/made 'download' the link text
 instead of 'here'/identified zip file (#3015)

---
 beginner_source/chatbot_tutorial.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/beginner_source/chatbot_tutorial.py b/beginner_source/chatbot_tutorial.py
index 44310cc362..f902f8cd71 100644
--- a/beginner_source/chatbot_tutorial.py
+++ b/beginner_source/chatbot_tutorial.py
@@ -84,8 +84,7 @@
 # Preparations
 # ------------
 #
-# To start, Download the data ZIP file
-# `here <https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip>`__
+# To get started, `download <https://zissou.infosci.cornell.edu/convokit/datasets/movie-corpus/movie-corpus.zip>`__ the Movie-Dialogs Corpus zip file.
 
 # and put in a ``data/`` directory under the current directory.
 #

From afcd669d56554c2ef46f2da92edbfaf8c54043ec Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Tue, 27 Aug 2024 12:57:19 -0700
Subject: [PATCH 05/10] Add weights_only=True to torch.load (#3012)

* Add weights_only=True to torch.load
---
 advanced_source/dynamic_quantization_tutorial.py |  3 ++-
 advanced_source/static_quantization_tutorial.rst |  2 +-
 beginner_source/basics/quickstart_tutorial.py    |  2 +-
 beginner_source/basics/saveloadrun_tutorial.py   | 16 +++++++++++++---
 beginner_source/blitz/cifar10_tutorial.py        |  2 +-
 beginner_source/fgsm_tutorial.py                 |  2 +-
 beginner_source/saving_loading_models.py         | 16 ++++++++--------
 beginner_source/transfer_learning_tutorial.py    |  2 +-
 .../autograd_saved_tensors_hooks_tutorial.py     |  6 +++---
 intermediate_source/ddp_tutorial.rst             |  2 +-
 intermediate_source/tiatoolbox_tutorial.rst      |  2 +-
 prototype_source/fx_graph_mode_ptq_dynamic.py    |  3 ++-
 prototype_source/fx_graph_mode_ptq_static.rst    |  6 +++---
 prototype_source/pt2e_quant_ptq.rst              |  2 +-
 prototype_source/pt2e_quant_qat.rst              |  2 +-
 .../intel_neural_compressor_for_pytorch.rst      |  2 +-
 .../recipes/module_load_state_dict_tips.py       |  8 ++++----
 .../recipes/save_load_across_devices.py          |  2 +-
 .../saving_and_loading_a_general_checkpoint.py   |  2 +-
 .../saving_and_loading_models_for_inference.py   |  2 +-
 .../saving_multiple_models_in_one_file.py        |  2 +-
 ...el_using_parameters_from_a_different_model.py |  2 +-
 22 files changed, 50 insertions(+), 38 deletions(-)

diff --git a/advanced_source/dynamic_quantization_tutorial.py b/advanced_source/dynamic_quantization_tutorial.py
index 9cc07a1d95..c8d94789d5 100644
--- a/advanced_source/dynamic_quantization_tutorial.py
+++ b/advanced_source/dynamic_quantization_tutorial.py
@@ -151,7 +151,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/advanced_source/static_quantization_tutorial.rst b/advanced_source/static_quantization_tutorial.rst
index 3b818aa03a..efb171c0df 100644
--- a/advanced_source/static_quantization_tutorial.rst
+++ b/advanced_source/static_quantization_tutorial.rst
@@ -286,7 +286,7 @@ We next define several helper functions to help with model evaluation. These mos
 
     def load_model(model_file): 
         model = MobileNetV2() 
-        state_dict = torch.load(model_file) 
+        state_dict = torch.load(model_file, weights_only=True) 
         model.load_state_dict(state_dict) 
         model.to('cpu') 
         return model  
diff --git a/beginner_source/basics/quickstart_tutorial.py b/beginner_source/basics/quickstart_tutorial.py
index 07a1be517d..df7628081b 100644
--- a/beginner_source/basics/quickstart_tutorial.py
+++ b/beginner_source/basics/quickstart_tutorial.py
@@ -216,7 +216,7 @@ def test(dataloader, model, loss_fn):
 # the state dictionary into it.
 
 model = NeuralNetwork().to(device)
-model.load_state_dict(torch.load("model.pth"))
+model.load_state_dict(torch.load("model.pth", weights_only=True))
 
 #############################################################
 # This model can now be used to make predictions.
diff --git a/beginner_source/basics/saveloadrun_tutorial.py b/beginner_source/basics/saveloadrun_tutorial.py
index 16a9f03741..5b3aef124b 100644
--- a/beginner_source/basics/saveloadrun_tutorial.py
+++ b/beginner_source/basics/saveloadrun_tutorial.py
@@ -32,9 +32,14 @@
 ##########################
 # To load model weights, you need to create an instance of the same model first, and then load the parameters
 # using ``load_state_dict()`` method.
+#
+# In the code below, we set ``weights_only=True`` to limit the
+# functions executed during unpickling to only those necessary for
+# loading weights. Using ``weights_only=True`` is considered
+# a best practice when loading weights.
 
 model = models.vgg16() # we do not specify ``weights``, i.e. create untrained model
-model.load_state_dict(torch.load('model_weights.pth'))
+model.load_state_dict(torch.load('model_weights.pth', weights_only=True))
 model.eval()
 
 ###########################
@@ -50,9 +55,14 @@
 torch.save(model, 'model.pth')
 
 ########################
-# We can then load the model like this:
+# We can then load the model as demonstrated below.
+#
+# As described in `Saving and loading torch.nn.Modules <pytorch.org/docs/main/notes/serialization.html#saving-and-loading-torch-nn-modules>`__,
+# saving ``state_dict``s is considered the best practice. However,
+# below we use ``weights_only=False`` because this involves loading the
+# model, which is a legacy use case for ``torch.save``.
 
-model = torch.load('model.pth')
+model = torch.load('model.pth', weights_only=False),
 
 ########################
 # .. note:: This approach uses Python `pickle <https://docs.python.org/3/library/pickle.html>`_ module when serializing the model, thus it relies on the actual class definition to be available when loading the model.
diff --git a/beginner_source/blitz/cifar10_tutorial.py b/beginner_source/blitz/cifar10_tutorial.py
index 8e3f325292..f38abdd566 100644
--- a/beginner_source/blitz/cifar10_tutorial.py
+++ b/beginner_source/blitz/cifar10_tutorial.py
@@ -221,7 +221,7 @@ def forward(self, x):
 # wasn't necessary here, we only did it to illustrate how to do so):
 
 net = Net()
-net.load_state_dict(torch.load(PATH))
+net.load_state_dict(torch.load(PATH, weights_only=True))
 
 ########################################################################
 # Okay, now let us see what the neural network thinks these examples above are:
diff --git a/beginner_source/fgsm_tutorial.py b/beginner_source/fgsm_tutorial.py
index 007ad3fd95..9bdf52d84b 100644
--- a/beginner_source/fgsm_tutorial.py
+++ b/beginner_source/fgsm_tutorial.py
@@ -192,7 +192,7 @@ def forward(self, x):
 model = Net().to(device)
 
 # Load the pretrained model
-model.load_state_dict(torch.load(pretrained_model, map_location=device))
+model.load_state_dict(torch.load(pretrained_model, map_location=device, weights_only=True))
 
 # Set the model in evaluation mode. In this case this is for the Dropout layers
 model.eval()
diff --git a/beginner_source/saving_loading_models.py b/beginner_source/saving_loading_models.py
index fcd33be253..6c9b6b1fd7 100644
--- a/beginner_source/saving_loading_models.py
+++ b/beginner_source/saving_loading_models.py
@@ -153,7 +153,7 @@
 # .. code:: python
 #
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH), weights_only=True)
 #    model.eval()
 #
 # .. note::
@@ -206,7 +206,7 @@
 # .. code:: python
 #
 #    # Model class must be defined somewhere
-#    model = torch.load(PATH)
+#    model = torch.load(PATH, weights_only=False)
 #    model.eval()
 #
 # This save/load process uses the most intuitive syntax and involves the
@@ -290,7 +290,7 @@
 #    model = TheModelClass(*args, **kwargs)
 #    optimizer = TheOptimizerClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    model.load_state_dict(checkpoint['model_state_dict'])
 #    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 #    epoch = checkpoint['epoch']
@@ -354,7 +354,7 @@
 #    optimizerA = TheOptimizerAClass(*args, **kwargs)
 #    optimizerB = TheOptimizerBClass(*args, **kwargs)
 #
-#    checkpoint = torch.load(PATH)
+#    checkpoint = torch.load(PATH, weights_only=True)
 #    modelA.load_state_dict(checkpoint['modelA_state_dict'])
 #    modelB.load_state_dict(checkpoint['modelB_state_dict'])
 #    optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
@@ -407,7 +407,7 @@
 # .. code:: python
 #
 #    modelB = TheModelBClass(*args, **kwargs)
-#    modelB.load_state_dict(torch.load(PATH), strict=False)
+#    modelB.load_state_dict(torch.load(PATH), strict=False, weights_only=True)
 #
 # Partially loading a model or loading a partial model are common
 # scenarios when transfer learning or training a new complex model.
@@ -446,7 +446,7 @@
 #
 #    device = torch.device('cpu')
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location=device))
+#    model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 #
 # When loading a model on a CPU that was trained with a GPU, pass
 # ``torch.device('cpu')`` to the ``map_location`` argument in the
@@ -469,7 +469,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH))
+#    model.load_state_dict(torch.load(PATH, weights_only=True))
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
@@ -497,7 +497,7 @@
 #
 #    device = torch.device("cuda")
 #    model = TheModelClass(*args, **kwargs)
-#    model.load_state_dict(torch.load(PATH, map_location="cuda:0"))  # Choose whatever GPU device number you want
+#    model.load_state_dict(torch.load(PATH, weights_only=True, map_location="cuda:0"))  # Choose whatever GPU device number you want
 #    model.to(device)
 #    # Make sure to call input = input.to(device) on any input tensors that you feed to the model
 #
diff --git a/beginner_source/transfer_learning_tutorial.py b/beginner_source/transfer_learning_tutorial.py
index 7a2b053763..de7a178bd7 100644
--- a/beginner_source/transfer_learning_tutorial.py
+++ b/beginner_source/transfer_learning_tutorial.py
@@ -209,7 +209,7 @@ def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
         print(f'Best val Acc: {best_acc:4f}')
 
         # load best model weights
-        model.load_state_dict(torch.load(best_model_params_path))
+        model.load_state_dict(torch.load(best_model_params_path, weights_only=True))
     return model
 
 
diff --git a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
index f16b170ee6..ed581426c2 100644
--- a/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
+++ b/intermediate_source/autograd_saved_tensors_hooks_tutorial.py
@@ -397,7 +397,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    return torch.load(name)
+    return torch.load(name, weights_only=True)
 
 
 ######################################################################
@@ -420,7 +420,7 @@ def pack_hook(tensor):
     return name
 
 def unpack_hook(name):
-    tensor = torch.load(name)
+    tensor = torch.load(name, weights_only=True)
     os.remove(name)
     return tensor
 
@@ -462,7 +462,7 @@ def pack_hook(tensor):
     return temp_file
 
 def unpack_hook(temp_file):
-    return torch.load(temp_file.name)
+    return torch.load(temp_file.name, weights_only=True)
 
 
 ######################################################################
diff --git a/intermediate_source/ddp_tutorial.rst b/intermediate_source/ddp_tutorial.rst
index 13297fb2a1..cff5105fa5 100644
--- a/intermediate_source/ddp_tutorial.rst
+++ b/intermediate_source/ddp_tutorial.rst
@@ -214,7 +214,7 @@ and elasticity support, please refer to `TorchElastic <https://pytorch.org/elast
         # configure map_location properly
         map_location = {'cuda:%d' % 0: 'cuda:%d' % rank}
         ddp_model.load_state_dict(
-            torch.load(CHECKPOINT_PATH, map_location=map_location))
+            torch.load(CHECKPOINT_PATH, map_location=map_location, weights_only=True))
 
         loss_fn = nn.MSELoss()
         optimizer = optim.SGD(ddp_model.parameters(), lr=0.001)
diff --git a/intermediate_source/tiatoolbox_tutorial.rst b/intermediate_source/tiatoolbox_tutorial.rst
index dbaf3cdc46..de9b303133 100644
--- a/intermediate_source/tiatoolbox_tutorial.rst
+++ b/intermediate_source/tiatoolbox_tutorial.rst
@@ -368,7 +368,7 @@ The PatchPredictor class runs a CNN-based classifier written in PyTorch.
 
     # Users can load any PyTorch model architecture instead using the following script
     model = vanilla.CNNModel(backbone="resnet18", num_classes=9) # Importing model from torchvision.models.resnet18
-    model.load_state_dict(torch.load(weights_path, map_location="cpu"), strict=True)
+    model.load_state_dict(torch.load(weights_path, map_location="cpu", weights_only=True), strict=True)
     def preproc_func(img):
         img = PIL.Image.fromarray(img)
         img = transforms.ToTensor()(img)
diff --git a/prototype_source/fx_graph_mode_ptq_dynamic.py b/prototype_source/fx_graph_mode_ptq_dynamic.py
index 84d6ccb183..fc29e5fa97 100644
--- a/prototype_source/fx_graph_mode_ptq_dynamic.py
+++ b/prototype_source/fx_graph_mode_ptq_dynamic.py
@@ -171,7 +171,8 @@ def tokenize(self, path):
 model.load_state_dict(
     torch.load(
         model_data_filepath + 'word_language_model_quantize.pth',
-        map_location=torch.device('cpu')
+        map_location=torch.device('cpu'),
+        weights_only=True
         )
     )
 
diff --git a/prototype_source/fx_graph_mode_ptq_static.rst b/prototype_source/fx_graph_mode_ptq_static.rst
index a7165f713f..0c4f8065e3 100644
--- a/prototype_source/fx_graph_mode_ptq_static.rst
+++ b/prototype_source/fx_graph_mode_ptq_static.rst
@@ -157,7 +157,7 @@ Download the `torchvision resnet18 model <https://download.pytorch.org/models/re
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
@@ -320,7 +320,7 @@ We can now print the size and accuracy of the quantized model.
     # ModuleAttributeError: 'ConvReLU2d' object has no attribute '_modules'
     # save the whole model directly
     # torch.save(quantized_model, fx_graph_mode_model_file_path)
-    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path)
+    # loaded_quantized_model = torch.load(fx_graph_mode_model_file_path, weights_only=False)
 
     # save with state_dict
     # torch.save(quantized_model.state_dict(), fx_graph_mode_model_file_path)
@@ -328,7 +328,7 @@ We can now print the size and accuracy of the quantized model.
     # model_to_quantize = copy.deepcopy(float_model)
     # prepared_model = prepare_fx(model_to_quantize, {"": qconfig})
     # loaded_quantized_model = convert_fx(prepared_model)
-    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path))
+    # loaded_quantized_model.load_state_dict(torch.load(fx_graph_mode_model_file_path), weights_only=True)
 
     # save with script
     torch.jit.save(torch.jit.script(quantized_model), fx_graph_mode_model_file_path)
diff --git a/prototype_source/pt2e_quant_ptq.rst b/prototype_source/pt2e_quant_ptq.rst
index 7f46c86e42..0fe713f8ab 100644
--- a/prototype_source/pt2e_quant_ptq.rst
+++ b/prototype_source/pt2e_quant_ptq.rst
@@ -274,7 +274,7 @@ and rename it to ``data/resnet18_pretrained_float.pth``.
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         model.to("cpu")
         return model
diff --git a/prototype_source/pt2e_quant_qat.rst b/prototype_source/pt2e_quant_qat.rst
index 6d995d368e..d716af5fec 100644
--- a/prototype_source/pt2e_quant_qat.rst
+++ b/prototype_source/pt2e_quant_qat.rst
@@ -172,7 +172,7 @@ prepare the data. These steps are very similar to the ones defined in the
 
     def load_model(model_file):
         model = resnet18(pretrained=False)
-        state_dict = torch.load(model_file)
+        state_dict = torch.load(model_file, weights_only=True)
         model.load_state_dict(state_dict)
         return model
 
diff --git a/recipes_source/intel_neural_compressor_for_pytorch.rst b/recipes_source/intel_neural_compressor_for_pytorch.rst
index 67f1a7f333..02ce3d7b37 100755
--- a/recipes_source/intel_neural_compressor_for_pytorch.rst
+++ b/recipes_source/intel_neural_compressor_for_pytorch.rst
@@ -115,7 +115,7 @@ In this tutorial, the LeNet model is used to demonstrate how to deal with *Intel
             return F.log_softmax(x, dim=1)
 
     model = Net()
-    model.load_state_dict(torch.load('./lenet_mnist_model.pth'))
+    model.load_state_dict(torch.load('./lenet_mnist_model.pth', weights_only=True))
 
 The pretrained model weight `lenet_mnist_model.pth` comes from
 `here <https://drive.google.com/drive/folders/1fn83DF14tWmit0RTKWRhPq5uVXt73e0h?usp=sharing>`_.
diff --git a/recipes_source/recipes/module_load_state_dict_tips.py b/recipes_source/recipes/module_load_state_dict_tips.py
index 17c812b016..70e9830cb3 100644
--- a/recipes_source/recipes/module_load_state_dict_tips.py
+++ b/recipes_source/recipes/module_load_state_dict_tips.py
@@ -39,7 +39,7 @@ def forward(self, x):
 # to ``torch.load``, the ``torch.device()`` context manager and the ``assign``
 # keyword argument to ``nn.Module.load_state_dict()``.
 
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 with torch.device('meta'):
   meta_m = SomeModule(1000)
 meta_m.load_state_dict(state_dict, assign=True)
@@ -47,7 +47,7 @@ def forward(self, x):
 #############################################################################
 # Compare the snippet below to the one above:
 
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 m = SomeModule(1000)
 m.load_state_dict(state_dict)
 
@@ -71,7 +71,7 @@ def forward(self, x):
 # * Waiting for the entire checkpoint to be loaded into RAM before performing, for example, some per-tensor processing.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth')
+state_dict = torch.load('checkpoint.pth', weights_only=True)
 end_time = time.time()
 print(f"loading time without mmap={end_time - start_time}")
 
@@ -84,7 +84,7 @@ def forward(self, x):
 # storages will be memory-mapped.
 
 start_time = time.time()
-state_dict = torch.load('checkpoint.pth', mmap=True)
+state_dict = torch.load('checkpoint.pth', mmap=True, weights_only=True)
 end_time = time.time()
 print(f"loading time with mmap={end_time - start_time}")
 
diff --git a/recipes_source/recipes/save_load_across_devices.py b/recipes_source/recipes/save_load_across_devices.py
index be950e15b1..c59af8821e 100644
--- a/recipes_source/recipes/save_load_across_devices.py
+++ b/recipes_source/recipes/save_load_across_devices.py
@@ -97,7 +97,7 @@ def forward(self, x):
 # Load
 device = torch.device('cpu')
 model = Net()
-model.load_state_dict(torch.load(PATH, map_location=device))
+model.load_state_dict(torch.load(PATH, map_location=device, weights_only=True))
 
 
 ######################################################################
diff --git a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
index 31b14f3a28..8c773a1490 100644
--- a/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
+++ b/recipes_source/recipes/saving_and_loading_a_general_checkpoint.py
@@ -131,7 +131,7 @@ def forward(self, x):
 model = Net()
 optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 model.load_state_dict(checkpoint['model_state_dict'])
 optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 epoch = checkpoint['epoch']
diff --git a/recipes_source/recipes/saving_and_loading_models_for_inference.py b/recipes_source/recipes/saving_and_loading_models_for_inference.py
index cd24b77c1d..7adce2a90b 100644
--- a/recipes_source/recipes/saving_and_loading_models_for_inference.py
+++ b/recipes_source/recipes/saving_and_loading_models_for_inference.py
@@ -117,7 +117,7 @@ def forward(self, x):
 
 # Load
 model = Net()
-model.load_state_dict(torch.load(PATH))
+model.load_state_dict(torch.load(PATH, weights_only=True))
 model.eval()
 
 
diff --git a/recipes_source/recipes/saving_multiple_models_in_one_file.py b/recipes_source/recipes/saving_multiple_models_in_one_file.py
index f468d7ac6a..e938be03b4 100644
--- a/recipes_source/recipes/saving_multiple_models_in_one_file.py
+++ b/recipes_source/recipes/saving_multiple_models_in_one_file.py
@@ -128,7 +128,7 @@ def forward(self, x):
 optimModelA = optim.SGD(modelA.parameters(), lr=0.001, momentum=0.9)
 optimModelB = optim.SGD(modelB.parameters(), lr=0.001, momentum=0.9)
 
-checkpoint = torch.load(PATH)
+checkpoint = torch.load(PATH, weights_only=True)
 modelA.load_state_dict(checkpoint['modelA_state_dict'])
 modelB.load_state_dict(checkpoint['modelB_state_dict'])
 optimizerA.load_state_dict(checkpoint['optimizerA_state_dict'])
diff --git a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
index 40aeeea9db..a0752bfc67 100644
--- a/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
+++ b/recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model.py
@@ -124,7 +124,7 @@ def forward(self, x):
 # are loading into.
 # 
 
-netB.load_state_dict(torch.load(PATH), strict=False)
+netB.load_state_dict(torch.load(PATH, weights_only=True), strict=False)
 
 
 ######################################################################

From fc016bdc4b10cdbe3437114279dcdcb73fadd82f Mon Sep 17 00:00:00 2001
From: dev_thomas <36235705+hadh93@users.noreply.github.com>
Date: Thu, 29 Aug 2024 01:41:22 +0900
Subject: [PATCH 06/10] Fix typos in dynamic_quantization_bert_tutorial.rst
 (#3019)

---
 intermediate_source/dynamic_quantization_bert_tutorial.rst | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/intermediate_source/dynamic_quantization_bert_tutorial.rst b/intermediate_source/dynamic_quantization_bert_tutorial.rst
index 1ea6ea46dd..e515f53a1d 100644
--- a/intermediate_source/dynamic_quantization_bert_tutorial.rst
+++ b/intermediate_source/dynamic_quantization_bert_tutorial.rst
@@ -79,7 +79,7 @@ Mac:
 
 .. code:: shell
 
-   yes y | pip uninstall torch tochvision
+   yes y | pip uninstall torch torchvision
    yes y | pip install --pre torch -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
 
 
@@ -206,7 +206,7 @@ in `examples </~https://github.com/huggingface/transformers/tree/master/examples#m
        --save_steps 100000 \
        --output_dir $OUT_DIR
 
-We provide the fined-tuned BERT model for MRPC task `here <https://download.pytorch.org/tutorial/MRPC.zip>`_.
+We provide the fine-tuned BERT model for MRPC task `here <https://download.pytorch.org/tutorial/MRPC.zip>`_.
 To save time, you can download the model file (~400 MB) directly into your local folder ``$OUT_DIR``.
 
 2.1 Set global configurations
@@ -273,7 +273,7 @@ We load the tokenizer and fine-tuned BERT sequence classifier model
 2.3 Define the tokenize and evaluation function
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-We reuse the tokenize and evaluation function from `Huggingface </~https://github.com/huggingface/transformers/blob/master/examples/run_glue.py>`_.
+We reuse the tokenize and evaluation function from `HuggingFace </~https://github.com/huggingface/transformers/blob/master/examples/run_glue.py>`_.
 
 .. code:: python
 

From 4cb73cabeeae16d52d35e3098312da123ed5b431 Mon Sep 17 00:00:00 2001
From: Richard Zou <zou3519@users.noreply.github.com>
Date: Thu, 29 Aug 2024 10:05:07 -0400
Subject: [PATCH 07/10] Improve custom ops tutorials (#3020)

Co-authored-by: Svetlana Karslioglu <svekars@meta.com>
---
 advanced_source/cpp_custom_ops.rst   |  2 ++
 advanced_source/python_custom_ops.py | 15 ++++++++++-----
 2 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/advanced_source/cpp_custom_ops.rst b/advanced_source/cpp_custom_ops.rst
index 435ff088bc..ffabd6eff7 100644
--- a/advanced_source/cpp_custom_ops.rst
+++ b/advanced_source/cpp_custom_ops.rst
@@ -174,6 +174,8 @@ To add ``torch.compile`` support for an operator, we must add a FakeTensor kerne
 known as a "meta kernel" or "abstract impl"). FakeTensors are Tensors that have
 metadata (such as shape, dtype, device) but no data: the FakeTensor kernel for an
 operator specifies how to compute the metadata of output tensors given the metadata of input tensors.
+The FakeTensor kernel should return dummy Tensors of your choice with
+the correct Tensor metadata (shape/strides/``dtype``/device).
 
 We recommend that this be done from Python via the `torch.library.register_fake` API,
 though it is possible to do this from C++ as well (see
diff --git a/advanced_source/python_custom_ops.py b/advanced_source/python_custom_ops.py
index 1e429b76b3..0b3bf6e474 100644
--- a/advanced_source/python_custom_ops.py
+++ b/advanced_source/python_custom_ops.py
@@ -66,7 +66,7 @@ def display(img):
 ######################################################################
 # ``crop`` is not handled effectively out-of-the-box by
 # ``torch.compile``: ``torch.compile`` induces a
-# `"graph break" <https://pytorch.org/docs/stable/torch.compiler_faq.html#graph-breaks>`_ 
+# `"graph break" <https://pytorch.org/docs/stable/torch.compiler_faq.html#graph-breaks>`_
 # on functions it is unable to handle and graph breaks are bad for performance.
 # The following code demonstrates this by raising an error
 # (``torch.compile`` with ``fullgraph=True`` raises an error if a
@@ -85,9 +85,9 @@ def f(img):
 #
 # 1. wrap the function into a PyTorch custom operator.
 # 2. add a "``FakeTensor`` kernel" (aka "meta kernel") to the operator.
-#    Given the metadata (e.g. shapes)
-#    of the input Tensors, this function says how to compute the metadata
-#    of the output Tensor(s).
+#    Given some ``FakeTensors`` inputs (dummy Tensors that don't have storage),
+#    this function should return dummy Tensors of your choice with the correct
+#    Tensor metadata (shape/strides/``dtype``/device).
 
 
 from typing import Sequence
@@ -130,6 +130,11 @@ def f(img):
 # ``autograd.Function`` with PyTorch operator registration APIs can lead to (and
 # has led to) silent incorrectness when composed with ``torch.compile``.
 #
+# If you don't need training support, there is no need to use
+# ``torch.library.register_autograd``.
+# If you end up training with a ``custom_op`` that doesn't have an autograd
+# registration, we'll raise an error message.
+#
 # The gradient formula for ``crop`` is essentially ``PIL.paste`` (we'll leave the
 # derivation as an exercise to the reader). Let's first wrap ``paste`` into a
 # custom operator:
@@ -203,7 +208,7 @@ def setup_context(ctx, inputs, output):
 ######################################################################
 # Mutable Python Custom operators
 # -------------------------------
-# You can also wrap a Python function that mutates its inputs into a custom 
+# You can also wrap a Python function that mutates its inputs into a custom
 # operator.
 # Functions that mutate inputs are common because that is how many low-level
 # kernels are written; for example, a kernel that computes ``sin`` may take in

From 2bdd6420081f40c3d467adb147ee22fb74c6e828 Mon Sep 17 00:00:00 2001
From: Tim Statler <tim.statler@gmail.com>
Date: Thu, 29 Aug 2024 08:13:14 -0700
Subject: [PATCH 08/10] Removed outdated steps in README about running about
 setup.py (#3014)

Co-authored-by: Svetlana Karslioglu <svekars@meta.com>
---
 README.md | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index fe4b4b6edd..af84d9ebe7 100644
--- a/README.md
+++ b/README.md
@@ -33,7 +33,7 @@ If you are starting off with a Jupyter notebook, you can use [this script](https
 
 ## Building locally
 
-The tutorial build is very large and requires a GPU. If your machine does not have a GPU device, you can preview your HTML build without actually downloading the data and running the tutorial code: 
+The tutorial build is very large and requires a GPU. If your machine does not have a GPU device, you can preview your HTML build without actually downloading the data and running the tutorial code:
 
 1. Install required dependencies by running: `pip install -r requirements.txt`.
 
@@ -42,8 +42,6 @@ The tutorial build is very large and requires a GPU. If your machine does not ha
 - If you have a GPU-powered laptop, you can build using `make docs`. This will download the data, execute the tutorials and build the documentation to `docs/` directory. This might take about 60-120 min for systems with GPUs. If you do not have a GPU installed on your system, then see next step.
 - You can skip the computationally intensive graph generation by running `make html-noplot` to build basic html documentation to `_build/html`. This way, you can quickly preview your tutorial.
 
-> If you get **ModuleNotFoundError: No module named 'pytorch_sphinx_theme' make: *** [html-noplot] Error 2** from /tutorials/src/pytorch-sphinx-theme or /venv/src/pytorch-sphinx-theme (while using virtualenv), run `python setup.py install`.
-
 ## Building a single tutorial
 
 You can build a single tutorial by using the `GALLERY_PATTERN` environment variable. For example to run only `neural_style_transfer_tutorial.py`, run:
@@ -61,8 +59,8 @@ The `GALLERY_PATTERN` variable respects regular expressions.
 
 
 ## About contributing to PyTorch Documentation and Tutorials
-* You can find information about contributing to PyTorch documentation in the 
-PyTorch Repo [README.md](/~https://github.com/pytorch/pytorch/blob/master/README.md) file. 
+* You can find information about contributing to PyTorch documentation in the
+PyTorch Repo [README.md](/~https://github.com/pytorch/pytorch/blob/master/README.md) file.
 * Additional information can be found in [PyTorch CONTRIBUTING.md](/~https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md).
 
 

From b3c49a3f5e42e22db8982a9ecb3e1d5b30b83ec0 Mon Sep 17 00:00:00 2001
From: Svetlana Karslioglu <svekars@meta.com>
Date: Thu, 29 Aug 2024 09:56:59 -0700
Subject: [PATCH 09/10] Fix hovering over the GCS search button (#3005)

* Fix hovering over the GCS search button
---
 _static/css/custom.css | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/_static/css/custom.css b/_static/css/custom.css
index 09aba28f25..cc195d9906 100755
--- a/_static/css/custom.css
+++ b/_static/css/custom.css
@@ -100,3 +100,15 @@
    padding-left: 0px !important;
    padding-bottom: 0px !important;
 }
+
+.gsc-search-button .gsc-search-button-v2:focus {
+   border: transparent !important;
+   outline: none;
+   box-shadow: none;
+}
+.gsc-search-button-v2:active {
+   border: none !important;
+}
+.gsc-search-button-v2 {
+   border: none !important;
+}

From e07d43bb7b9b3e84c5682eb5b0cb8c07e2d35051 Mon Sep 17 00:00:00 2001
From: Tim Statler <tim.statler@gmail.com>
Date: Fri, 30 Aug 2024 14:36:59 -0700
Subject: [PATCH 10/10] Added warnings to select Pytorch mobile tutorials
 directing users to ExecuTorch (#3016)

* Added warnings to select ExecuTorch tutorials/recipes/prototypes

* Added warnings to select ExecuTorch tutorials/recipes/prototypes

* Added redirect for renamed prototype

* Update deeplabv3_on_android.rst

Fixed misplaced info tag.

* Apply suggestions from code review

---------

Co-authored-by: Svetlana Karslioglu <svekars@meta.com>
---
 beginner_source/deeplabv3_on_android.rst    | 4 ++++
 prototype_source/lite_interpreter.rst       | 9 +++++++++
 recipes_source/mobile_interpreter.rst       | 3 +++
 recipes_source/mobile_perf.rst              | 5 ++++-
 recipes_source/ptmobile_recipes_summary.rst | 3 +++
 5 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 prototype_source/lite_interpreter.rst

diff --git a/beginner_source/deeplabv3_on_android.rst b/beginner_source/deeplabv3_on_android.rst
index f2fe0e48f1..5ca7f01ad0 100644
--- a/beginner_source/deeplabv3_on_android.rst
+++ b/beginner_source/deeplabv3_on_android.rst
@@ -5,6 +5,10 @@ Image Segmentation DeepLabV3 on Android
 
 **Reviewed by**: `Jeremiah Chung </~https://github.com/jeremiahschung>`_
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review our `end-to-end workflows </~https://github.com/pytorch/executorch/tree/main/examples/portable#readme>`_ and review the `source code for DeepLabV3 </~https://github.com/pytorch/executorch/tree/main/examples/models/deeplab_v3>`_.
+
+
 Introduction
 ------------
 
diff --git a/prototype_source/lite_interpreter.rst b/prototype_source/lite_interpreter.rst
new file mode 100644
index 0000000000..73e950d72e
--- /dev/null
+++ b/prototype_source/lite_interpreter.rst
@@ -0,0 +1,9 @@
+(Prototype) Introduce lite interpreter workflow in Android and iOS
+=======================
+
+This tutorial has been moved to https://pytorch.org/tutorials/recipes/mobile_interpreter.html
+
+
+.. raw:: html
+
+   <meta http-equiv="Refresh" content="0; url='https://pytorch.org/tutorials/recipes/mobile_interpreter.html'" />
diff --git a/recipes_source/mobile_interpreter.rst b/recipes_source/mobile_interpreter.rst
index dda1dd9243..44036e74ff 100644
--- a/recipes_source/mobile_interpreter.rst
+++ b/recipes_source/mobile_interpreter.rst
@@ -3,6 +3,9 @@
 
 **Author**: `Chen Lai </~https://github.com/cccclai>`_, `Martin Yuan </~https://github.com/iseeyuan>`_
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review our new documentation to learn more about how to build `iOS <https://pytorch.org/executorch/stable/demo-apps-ios.html>`_ and `Android <https://pytorch.org/executorch/stable/demo-apps-android.html>`_ apps with ExecuTorch.
+
 Introduction
 ------------
 
diff --git a/recipes_source/mobile_perf.rst b/recipes_source/mobile_perf.rst
index aae1447cbf..14f183ab69 100644
--- a/recipes_source/mobile_perf.rst
+++ b/recipes_source/mobile_perf.rst
@@ -1,6 +1,9 @@
 Pytorch Mobile Performance Recipes
 ==================================
 
+.. warning::
+    PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also learn more about `quantization <https://pytorch.org/executorch/stable/quantization-overview.html>`_, `Hardware acceleration (op fusion using hw) <https://pytorch.org/executorch/stable/examples-end-to-end-to-lower-model-to-delegate.html>`_, and `benchmarking <https://pytorch.org/executorch/stable/sdk-profiling.html>`_ on ExecuTorch’s documentation pages.
+
 Introduction
 ----------------
 Performance (aka latency) is crucial to most, if not all,
@@ -245,7 +248,7 @@ For example, using ResNet-50 and running the following script:
 
 
 
-you would get the following result: 
+you would get the following result:
 
 ::
 
diff --git a/recipes_source/ptmobile_recipes_summary.rst b/recipes_source/ptmobile_recipes_summary.rst
index cddee940f2..6cc8f6f751 100644
--- a/recipes_source/ptmobile_recipes_summary.rst
+++ b/recipes_source/ptmobile_recipes_summary.rst
@@ -1,6 +1,9 @@
 Summary of PyTorch Mobile Recipes
 =====================================
 
+.. warning::
+    Note: PyTorch Mobile is no longer actively supported. Please check out `ExecuTorch <https://pytorch.org/executorch-overview>`_, PyTorch’s all-new on-device inference library. You can also review these `ExecuTorch examples </~https://github.com/pytorch/executorch/tree/main/examples#readme>`_.
+
 This summary provides a top level overview of recipes for PyTorch Mobile to help developers choose which recipes to follow for their PyTorch-powered mobile app development.
 
 Introduction