From 1874be7f6f32e5453a0516dc9d26e6482462aff2 Mon Sep 17 00:00:00 2001
From: qihqi <hanq@google.com>
Date: Mon, 13 Jan 2025 14:13:52 -0800
Subject: [PATCH] Update README.md (#207)

* Update README.md

add example for one request

* Update README.md

Co-authored-by: Siyuan Liu <lsiyuan@google.com>

---------

Co-authored-by: Siyuan Liu <lsiyuan@google.com>
---
 README.md | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)
diff --git a/README.md b/README.md
index 79e4ccc..37da373 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,42 @@ of the `checkpoints/<org>/<model>/hf_original` dir (or the corresponding subdir
 Llama3 checkpoints will be at `checkpoints/meta-llama/Llama-2-7b-hf/hf_original/*.safetensors`. You can replace these files with modified
 weights in HuggingFace format. 
 
+## Send one request
+
+Jetstream-pytorch uses gRPC for handling requests, the script below demonstrates how to
+send gRPC in Python. You can also use other gPRC clients.
+
+```python
+import requests
+import os
+import grpc
+
+from jetstream.core.proto import jetstream_pb2
+from jetstream.core.proto import jetstream_pb2_grpc
+
+prompt = "What are the top 5 languages?"
+
+channel = grpc.insecure_channel("localhost:8888")
+stub = jetstream_pb2_grpc.OrchestratorStub(channel)
+
+request = jetstream_pb2.DecodeRequest(
+    text_content=jetstream_pb2.DecodeRequest.TextContent(
+        text=prompt
+    ),
+    priority=0,
+    max_tokens=2000,
+)
+
+response = stub.Decode(request)
+output = []
+for resp in response:
+  output.extend(resp.stream_content.samples[0].text)
+
+text_output = "".join(output)
+print(f"Prompt: {prompt}")
+print(f"Response: {text_output}")
+```
+
 
 # Run the server with ray
 Below are steps run server with ray: