From 1874be7f6f32e5453a0516dc9d26e6482462aff2 Mon Sep 17 00:00:00 2001 From: qihqi Date: Mon, 13 Jan 2025 14:13:52 -0800 Subject: [PATCH] Update README.md (#207) * Update README.md add example for one request * Update README.md Co-authored-by: Siyuan Liu --------- Co-authored-by: Siyuan Liu --- README.md | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/README.md b/README.md index 79e4ccc..37da373 100644 --- a/README.md +++ b/README.md @@ -122,6 +122,42 @@ of the `checkpoints///hf_original` dir (or the corresponding subdir Llama3 checkpoints will be at `checkpoints/meta-llama/Llama-2-7b-hf/hf_original/*.safetensors`. You can replace these files with modified weights in HuggingFace format. +## Send one request + +Jetstream-pytorch uses gRPC for handling requests, the script below demonstrates how to +send gRPC in Python. You can also use other gPRC clients. + +```python +import requests +import os +import grpc + +from jetstream.core.proto import jetstream_pb2 +from jetstream.core.proto import jetstream_pb2_grpc + +prompt = "What are the top 5 languages?" + +channel = grpc.insecure_channel("localhost:8888") +stub = jetstream_pb2_grpc.OrchestratorStub(channel) + +request = jetstream_pb2.DecodeRequest( + text_content=jetstream_pb2.DecodeRequest.TextContent( + text=prompt + ), + priority=0, + max_tokens=2000, +) + +response = stub.Decode(request) +output = [] +for resp in response: + output.extend(resp.stream_content.samples[0].text) + +text_output = "".join(output) +print(f"Prompt: {prompt}") +print(f"Response: {text_output}") +``` + # Run the server with ray Below are steps run server with ray: