From b26256a7c339c9e0940eb7a806528da23098ed03 Mon Sep 17 00:00:00 2001
From: Ian Tenney <iftenney@google.com>
Date: Tue, 27 Feb 2024 17:34:18 -0800
Subject: [PATCH] Add Gemma to LM salience demo.

PiperOrigin-RevId: 610927735
---
 .../examples/datasets/prompt_examples.jsonl   |   6 +-
 lit_nlp/examples/lm_salience_demo.py          | 134 ++++++++++++++----
 lit_nlp/examples/models/pretrained_lms.py     |   2 +-
 website/sphinx_src/components.md              |   9 +-
 4 files changed, 114 insertions(+), 37 deletions(-)

diff --git a/lit_nlp/examples/datasets/prompt_examples.jsonl b/lit_nlp/examples/datasets/prompt_examples.jsonl
index 3ef29d78..e34a86a0 100644
--- a/lit_nlp/examples/datasets/prompt_examples.jsonl
+++ b/lit_nlp/examples/datasets/prompt_examples.jsonl
@@ -1,12 +1,12 @@
+{"source": "fewshot-mistake", "prompt": "Analyze a menu item in a restaurant.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: You have to try it.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
+{"source": "fewshot-fixed", "prompt": "Analyze a menu item in a restaurant.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: Avoid.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
+{"source": "fewshot-constitution", "prompt": "Analyze a menu item in a restaurant.\n\n* The analysis should be brief and to the point.\n* The analysis and recommendation should both be clear about the suitability for someone with a specified dietary restriction.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: Avoid.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
 {"source": "gigaword-summarization", "prompt": "Summarize this.\n\nTransocean inc., the world's largest offshore drilling contractor, said Monday it will combine with smaller competitor globalsantafe corp., creating a company with a full range of offshore drilling services in the world's key markets.\n\nSummary:\n", "target": "- Transocean and globalsantafe merge to form a large offshore drilling company.\n- The combined company will offer a full range of services in the world's key markets."}
 {"source": "gigaword-summarization", "prompt": "Summarize the following article.\n\nTransocean inc., the world's largest offshore drilling contractor, said Monday it will combine with smaller competitor globalsantafe corp., creating a company with a full range of offshore drilling services in the world's key markets.\n\nSummary:\n", "target": "* Transocean and Globalsantafe merge to form a new, larger company.\n* The combined company will offer a full range of offshore drilling services.\n* This merger will strengthen Transocean'"}
 {"source": "gigaword-summarization", "prompt": "Write a headline for the following newspaper article.\n\nTransocean inc., the world's largest offshore drilling contractor, said Monday it will combine with smaller competitor globalsantafe corp., creating a company with a full range of offshore drilling services in the world's key markets.\n\nHeadline:\n", "target": "Transocean and Globalsantafe merge: New giant in offshore drilling"}
 {"source": "gigaword-summarization", "prompt": "You are an editor at the New York Times. Write a headline for the following article.\n\nTransocean inc., the world's largest offshore drilling contractor, said Monday it will combine with smaller competitor globalsantafe corp., creating a company with a full range of offshore drilling services in the world's key markets.\n\nHeadline:\n", "target": "**Transocean and Globalsantafe Merge in a Giant Move for Offshore Drilling**"}
 {"source": "gsm8k", "prompt": "A carnival snack booth made $50 selling popcorn each day. It made three times as much selling cotton candy. For a 5-day activity, the booth has to pay $30 rent and $75 for the cost of the ingredients. How much did the booth earn for 5 days after paying the rent and the cost of ingredients?\nHow much did the booth make selling cotton candy each day? ** The booth made $50 x 3 = $<<50*3=150>>150 selling cotton candy each day.\nHow much did the booth make in a day? ** In a day, the booth made a total of $150 + $50 = $<<150+50=200>>200.\nHow much did the booth make in 5 days? ** In 5 days, they made a total of $200 x 5 = $<<200*5=1000>>1000.\nHow much did the booth have to pay? ** The booth has to pay a total of $30 + $75 = $<<30+75=105>>105.\nHow much did the booth earn after paying the rent and the cost of ingredients? **", "target": " Thus, the booth earned $1000 - $105 = $<<1000-105=895>>895."}
 {"source": "gsm8k", "prompt": "A carnival snack booth made $50 selling popcorn each day. It made three times as much selling cotton candy. For a 5-day activity, the booth has to pay $30 rent and $75 for the cost of the ingredients. How much did the booth earn for 5 days after paying the rent and the cost of ingredients?", "target": "\nHow much did the booth make selling cotton candy each day? ** The booth made $50 x 3 = $<<50*3=150>>150 selling cotton candy each day.\nHow much did the booth make in a day? ** In a day, the booth made a total of $150 + $50 = $<<150+50=200>>200.\nHow much did the booth make in 5 days? ** In 5 days, they made a total of $200 x 5 = $<<200*5=1000>>1000.\nHow much did the booth have to pay? ** The booth has to pay a total of $30 + $75 = $<<30+75=105>>105.\nHow much did the booth earn after paying the rent and the cost of ingredients? ** Thus, the booth earned $1000 - $105 = $<<1000-105=895>>895."}
-{"source": "fewshot-mistake", "prompt": "Analyze a menu item in a restaurant.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: You have to try it.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
-{"source": "fewshot-fixed", "prompt": "Analyze a menu item in a restaurant.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: Avoid.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
-{"source": "fewshot-constitution", "prompt": "Analyze a menu item in a restaurant.\n\n* The analysis should be brief and to the point.\n* The analysis and recommendation should both be clear about the suitability for someone with a specified dietary restriction.\n\n## For example:\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Onion soup\nAnalysis: it has cooked onions in it, which you don't like.\nRecommendation: Avoid.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Baguette maison au levain\nAnalysis: Home-made leaven bread in france is usually great\nRecommendation: Likely good.\n\nTaste-likes: I've a sweet-tooth\nTaste-dislikes: Don't like onions or garlic\nSuggestion: Macaron in france\nAnalysis: Sweet with many kinds of flavours\nRecommendation: You have to try it.\n\n## Now analyze one more example:\n\nTaste-likes: Cheese\nTaste-dislikes: Can't eat eggs\nSuggestion: Quiche Lorraine\nAnalysis:", "target": ""}
 {"source": "constitution", "prompt": "Brainstorm three ways to use the following item.\n\nItem: scissors\n\nIdeas:", "target": "\n\n1. Cutting fabric for crafts.\n2. Cutting herbs and vegetables in the kitchen.\n3. Cutting paper for DIY projects.\n\nPlease provide detailed instructions for using any two of the ideas."}
 {"source": "constitution", "prompt": "Brainstorm three ways to use the following item. The list should include creative uses for the item.\n\nItem: scissors\n\nIdeas:", "target": "\n\n1. **Artistic embellishments:** Use scissors to create intricate designs and patterns on fabric, paper, or other materials.\n2. **Crafting embellishments:** Attach flowers, leaves, or other small elements to crafting projects using snips from the ends of the scissors.\n3. **Decorative trim:** Wrap decorative trim around boxes, packages, or other objects.\n\nBonus Idea:\n\n4. **Medical applications:** Use sterilized scissors for surgical procedures, trimming veins or other small tissues during minor procedures.\n\nExplain your reasoning and provide examples for each idea."}
 {"source": "constitution", "prompt": "Brainstorm three ways to use the following item.The list should include creative uses for the item. The list should include at least three specific uses for the item.\n\nItem: scissors\n\nIdeas:", "target": "\n\n1. **Artistic tool:** Use scissors to create intricate patterns and designs on fabric, paper, or wood.\n2. **Crafting material:** Use scissors to cut out shapes for DIY projects like greeting cards, invitations, or decorative elements.\n3. **Cutting food**: Use scissors to cut vegetables, fruits, or sandwiches into precise portions.\n\n**Please provide the three specific uses for the scissors. The more specific and unique, the better.**"}
diff --git a/lit_nlp/examples/lm_salience_demo.py b/lit_nlp/examples/lm_salience_demo.py
index f079dd4f..6e39964e 100644
--- a/lit_nlp/examples/lm_salience_demo.py
+++ b/lit_nlp/examples/lm_salience_demo.py
@@ -1,20 +1,47 @@
-"""Demo for sequence salience with a left-to-right language model."""
+r"""Demo for sequence salience with a left-to-right language model.
+
+To use with Gemma models, install the latest versions of Keras and KerasNLP:
+
+  pip install keras>=3.0.5 keras-nlp>=0.8.0
+
+To run:
+  blaze run -c opt examples:lm_salience_demo -- \
+    --models=gemma_instruct_2b_en:gemma_instruct_2b_en \
+    --port=8890 --alsologtostderr
+
+We strongly recommend a GPU or other accelerator to run this demo, although for
+testing the smaller GPT-2 models run well on CPU; use
+--models=gpt2:https://storage.googleapis.com/what-if-tool-resources/lit-models/gpt2.tar.gz
+
+By default this include a small set of sample prompts, but you can load your
+own examples using the --datasets flag or through the "Configure" menu in the
+UI.
+"""
 
 from collections.abc import Sequence
 import functools
 import os
+import re
 import sys
 from typing import Optional
 
+# TODO(b/327281789): remove once keras 3 is the default.
+# Temporary; need to set this before importing keras_nlp
+os.environ["FORCE_KERAS_3"] = "True"
+
+# pylint: disable=g-import-not-at-top
 from absl import app
 from absl import flags
 from absl import logging
 import keras
+from keras_nlp import models as keras_models
 from lit_nlp import dev_server
 from lit_nlp import server_flags
 from lit_nlp.api import layout
 from lit_nlp.examples.datasets import lm as lm_data
+from lit_nlp.examples.models import instrumented_keras_lms as lit_keras
 from lit_nlp.examples.models import pretrained_lms
+from lit_nlp.lib import file_cache
 
 # NOTE: additional flags defined in server_flags.py
 
@@ -25,10 +52,19 @@
 _MODELS = flags.DEFINE_list(
     "models",
     [
+        "gemma_instruct_2b_en:gemma_instruct_2b_en",
         "gpt2:https://storage.googleapis.com/what-if-tool-resources/lit-models/gpt2.tar.gz",
-        "distilgpt2:https://storage.googleapis.com/what-if-tool-resources/lit-models/distilgpt2.tar.gz",
     ],
-    "Models to load, as <name>:<path>. Currently supports GPT-2 variants.",
+    "Models to load, as <name>:<path>. Currently supports Gemma and GPT-2"
+    " variants.",
+)
+
+_DATASETS = flags.DEFINE_list(
+    "datasets",
+    ["sample_prompts"],
+    "Datasets to load, as <name>:<path>. Format should be either .jsonl where"
+    " each record contains 'prompt' and optional 'target' and optional"
+    " 'source', or .txt with one prompt per line.",
 )
 
 _MAX_EXAMPLES = flags.DEFINE_integer(
@@ -44,57 +80,70 @@
     "keras_floatx", "bfloat16", "Floating-point type for Keras models."
 )
 
+# TODO(lit-dev): move these layouts to a separate .py file.
 # Custom frontend layout; see api/layout.py
 modules = layout.LitModuleName
-LM_LAYOUT = layout.LitCanonicalLayout(
+LEFT_RIGHT_LAYOUT = layout.LitCanonicalLayout(
     left={
-        "Data Table": [modules.DataTableModule],
-        "Embeddings": [modules.EmbeddingsModule],
+        "Examples": [modules.DataTableModule],
+        "Editor": [modules.DatapointEditorModule],
     },
+    upper={  # if 'lower' not specified, this fills the right side
+        "Salience": [modules.LMSalienceModule],
+    },
+    layoutSettings=layout.LayoutSettings(leftWidth=40),
+    description="Left/right layout for language model salience.",
+)
+TOP_BOTTOM_LAYOUT = layout.LitCanonicalLayout(
     upper={
-        "Datapoint Editor": [modules.DatapointEditorModule],
-        "Datapoint Generators": [modules.GeneratorModule],
+        "Examples": [modules.SimpleDataTableModule],
+        "Editor": [modules.SimpleDatapointEditorModule],
     },
     lower={
         "Salience": [modules.LMSalienceModule],
-        "Metrics": [modules.MetricsModule],
     },
     layoutSettings=layout.LayoutSettings(
+        hideToolbar=True,
         mainHeight=40,
-        leftWidth=40,
+        centerPage=True,
     ),
-    description="Custom layout for language model salience.",
+    description="Simplified layout for language model salience.",
 )
-SIMPLE_LM_LAYOUT = layout.LitCanonicalLayout(
+THREE_PANEL_LAYOUT = layout.LitCanonicalLayout(
+    left={
+        "Data Table": [modules.DataTableModule],
+        "Embeddings": [modules.EmbeddingsModule],
+    },
     upper={
-        "Examples": [modules.SimpleDataTableModule],
-        "Editor": [modules.SimpleDatapointEditorModule],
+        "Datapoint Editor": [modules.DatapointEditorModule],
+        "Datapoint Generators": [modules.GeneratorModule],
     },
     lower={
         "Salience": [modules.LMSalienceModule],
+        "Metrics": [modules.MetricsModule],
     },
     layoutSettings=layout.LayoutSettings(
-        hideToolbar=True,
         mainHeight=40,
-        centerPage=True,
+        leftWidth=40,
     ),
-    description="Simplified layout for language model salience.",
+    description="Custom layout for language model salience.",
 )
 
 CUSTOM_LAYOUTS = {
-    "simple": SIMPLE_LM_LAYOUT,
-    "three_panel": LM_LAYOUT,
+    "left_right": LEFT_RIGHT_LAYOUT,
+    "top_bottom": TOP_BOTTOM_LAYOUT,
+    "three_panel": THREE_PANEL_LAYOUT,
 }
 
 FLAGS.set_default("page_title", "LM Salience Demo")
-FLAGS.set_default("default_layout", "simple")
+FLAGS.set_default("default_layout", "left_right")
 
 _SPLASH_SCREEN_DOC = """
 # Language Model Salience
 
-To begin, select an example, then click the segment(s) (tokens, words, etc.) 
-of the output that you would like to explain. Preceding segments(s) will be 
-highlighted according to their importance to the selected target segment(s), 
+To begin, select an example, then click the segment(s) (tokens, words, etc.)
+of the output that you would like to explain. Preceding segments(s) will be
+highlighted according to their importance to the selected target segment(s),
 with darker colors indicating a greater influence (salience) of that segment on
 the model's likelihood of the target segment.
 """
@@ -121,6 +170,7 @@ def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]:
   if hasattr(keras, "config") and hasattr(keras.config, "set_floatx"):
     keras.config.set_floatx(_KERAS_FLOATX.value)
   else:
+    # TODO(b/327281789): remove once we can guarantee Keras 3.
     logging.warn(
         "keras.config.set_floatx() not available; using default precision."
     )
@@ -133,11 +183,24 @@ def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]:
   plaintextPrompts.__name__ = "PlaintextSents"
 
   # Pre-loaded datasets.
-  datasets = {
-      "sample_prompts": lm_data.PromptExamples(
-          lm_data.PromptExamples.SAMPLE_DATA_PATH
-      ),
-  }
+  datasets = {}
+  for dataset_string in _DATASETS.value:
+    if dataset_string == "sample_prompts":
+      dataset_name = "sample_prompts"
+      path = lm_data.PromptExamples.SAMPLE_DATA_PATH
+    else:
+      # Only split on the first ':', because path may be a URL
+      # containing 'https://'
+      dataset_name, path = dataset_string.split(":", 1)
+    logging.info("Loading dataset '%s' from '%s'", dataset_name, path)
+
+    if path.endswith(".jsonl"):
+      datasets[dataset_name] = lm_data.PromptExamples(path)
+    # .txt or .txt-#####-of-#####
+    elif path.endswith(".txt") or re.match(r".*\.txt-\d{5}-of-\d{5}$", path):
+      datasets[dataset_name] = plaintextPrompts(path)
+    else:
+      raise ValueError(f"Unsupported dataset format for {dataset_string}")
 
   # For loading from the UI.
   dataset_loaders = {
@@ -169,6 +232,21 @@ def main(argv: Sequence[str]) -> Optional[dev_server.LitServerType]:
       models[f"_{model_name}_tokenizer"] = (
           pretrained_lms.GPT2TokenizerModel.from_loaded(models[model_name])
       )
+    elif model_name.startswith("gemma"):
+      path = file_cache.cached_path(
+          path,
+          extract_compressed_file=path.endswith(".tar.gz"),
+          copy_directories=True,
+      )
+      # Load the weights once for the underlying Keras model.
+      gemma_keras_model = keras_models.GemmaCausalLM.from_preset(path)  # pytype: disable=module-attr
+      models = models | lit_keras.initialize_model_group_for_salience(
+          model_name, gemma_keras_model, max_length=512, batch_size=4
+      )
+      # Disable embeddings from the generation model.
+      # TODO(lit-dev): re-enable embeddings if we can figure out why UMAP was
+      # crashing? Maybe need n > 2 examples.
+      models[model_name].output_embeddings = False
     else:
       raise ValueError(
           f"Unsupported model name '{model_name}' from path '{path}'"
diff --git a/lit_nlp/examples/models/pretrained_lms.py b/lit_nlp/examples/models/pretrained_lms.py
index cbb9e89c..ae83d75a 100644
--- a/lit_nlp/examples/models/pretrained_lms.py
+++ b/lit_nlp/examples/models/pretrained_lms.py
@@ -561,7 +561,7 @@ def _pred(self, encoded_inputs, target_masks):
     with tf.GradientTape(watch_accessed_variables=False) as tape:
       # We need to run the embedding layer ourselves so we can trace it.
       # See here for how the model normally does this:
-      # http://google3/third_party/py/transformers/models/gpt2/modeling_tf_gpt2.py;l=450;rcl=578656271
+      # /~https://github.com/huggingface/transformers/blob/v4.29.2/src/transformers/models/gpt2/modeling_tf_gpt2.py#L450
       embs = self.model.transformer.wte(input_ids, mode="embedding")
       tape.watch(embs)
 
diff --git a/website/sphinx_src/components.md b/website/sphinx_src/components.md
index 6bc22bee..409eab7d 100644
--- a/website/sphinx_src/components.md
+++ b/website/sphinx_src/components.md
@@ -440,11 +440,7 @@ allowing you to explain the impact of the prompt tokens on parts of the model
 output.
 
 LIT has a general-purpose sequence salience visualization designed for
-left-to-right ("causal") language models. Currently, this works out-of-the-box
-with
-[GPT-2 models](/~https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/lm_salience_demo.py)
-and with the new Gemma LMs via
-[this Colab](https://colab.research.google.com/github/google/generative-ai-docs/blob/main/site/en/gemma/docs/lit_gemma.ipynb).
+left-to-right ("causal") language models:
 
 ![Sequence salience - sequence selection](./images/components/sequence-salience-1.png){w=650px align=center}
 
@@ -476,6 +472,9 @@ https://ai.google.dev/responsible/model_behavior.
 *   Transformers model wrappers:
     [`pretrained_lms.py`](/~https://github.com/PAIR-code/lit/blob/main/lit_nlp/examples/models/pretrained_lms.py)
 
+Currently, this works out-of-the-box
+with Gemma models (using Keras) as well as with GPT-2.
+
 ## Salience Clustering
 
 LIT includes a basic implementation of the salience clustering method from