From 5f7f4233807a41a344676c522ddf17d9a09d3a66 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Thu, 27 Feb 2025 12:12:00 +0100 Subject: [PATCH 1/8] ollama: fix permission for automatically creating PR (#1233) Ref discussion: https://huggingface.slack.com/archives/CTKK32GE8/p1740478906515669 The CI token does not have permission to create PR automatically, therefore we replace it with a PAT provided via `secrets.HUGGINGFACE_JS_AUTOMATIC_PR` CC @paulinebm for viz --- .github/workflows/ollama-template-update.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ollama-template-update.yml b/.github/workflows/ollama-template-update.yml index f1f463a5c..e5f835da9 100644 --- a/.github/workflows/ollama-template-update.yml +++ b/.github/workflows/ollama-template-update.yml @@ -81,6 +81,7 @@ jobs: CURRENT_DATE: ${{ steps.prepare.outputs.CURRENT_DATE }} NEW_BRANCH: ${{ steps.changes.outputs.NEW_BRANCH }} with: + github-token: ${{ secrets.HUGGINGFACE_JS_AUTOMATIC_PR }} script: | const { repo, owner } = context.repo; const currDate = process.env.CURRENT_DATE; From 11e07c0ac352d656c7daceceb270800cfc2f5178 Mon Sep 17 00:00:00 2001 From: Ashwin Kumar <40177448+ashwinkumargb@users.noreply.github.com> Date: Thu, 27 Feb 2025 05:58:20 -0800 Subject: [PATCH 2/8] Adding Merlin for accurate download count (#1232) I don't think we are getting an accurate download count for our hugging face repo: https://huggingface.co/stanfordmimi/Merlin, since the downloads page counts the queries to the config.json file and in our case the config.json file is empty. I was looking through the hugging face documentation and it seems like I had to add the file to the hugging face repo to allow it to have an accurate download count? Please let me know if I'm mistaken. --------- Co-authored-by: Lucain Co-authored-by: Lucain --- packages/tasks/src/model-libraries.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/packages/tasks/src/model-libraries.ts b/packages/tasks/src/model-libraries.ts index a1a0f67dd..e050353c5 100644 --- a/packages/tasks/src/model-libraries.ts +++ b/packages/tasks/src/model-libraries.ts @@ -478,6 +478,13 @@ export const MODEL_LIBRARIES_UI_ELEMENTS = { countDownloads: `path:"MeshAnything_350m.pth"`, snippets: snippets.mesh_anything, }, + merlin: { + prettyLabel: "Merlin", + repoName: "Merlin", + repoUrl: "/~https://github.com/StanfordMIMI/Merlin", + filter: false, + countDownloads: `path_extension:"pt"`, + }, mitie: { prettyLabel: "MITIE", repoName: "MITIE", From e6332abff1a7b619a35a9b60ecf5bdfc2e159dd2 Mon Sep 17 00:00:00 2001 From: Mishig Date: Thu, 27 Feb 2025 14:58:30 +0100 Subject: [PATCH 3/8] featureExtraction should use FeatureExtractionInput (#1235) Read the comment /~https://github.com/huggingface/huggingface.js/pull/1235/files#r1973270449 --- packages/inference/src/tasks/nlp/featureExtraction.ts | 11 ++--------- packages/inference/src/types.ts | 3 ++- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/packages/inference/src/tasks/nlp/featureExtraction.ts b/packages/inference/src/tasks/nlp/featureExtraction.ts index 2b6ff411d..25a6695a2 100644 --- a/packages/inference/src/tasks/nlp/featureExtraction.ts +++ b/packages/inference/src/tasks/nlp/featureExtraction.ts @@ -1,16 +1,9 @@ +import type { FeatureExtractionInput } from "@huggingface/tasks"; import { InferenceOutputError } from "../../lib/InferenceOutputError"; import type { BaseArgs, Options } from "../../types"; import { request } from "../custom/request"; -export type FeatureExtractionArgs = BaseArgs & { - /** - * The inputs is a string or a list of strings to get the features from. - * - * inputs: "That is a happy person", - * - */ - inputs: string | string[]; -}; +export type FeatureExtractionArgs = BaseArgs & FeatureExtractionInput; /** * Returned values are a multidimensional array of floats (dimension depending on if you sent a string or a list of string, and if the automatic reduction, usually mean_pooling for instance was applied for you or not. This should be explained on the model's README). diff --git a/packages/inference/src/types.ts b/packages/inference/src/types.ts index 6c9937679..c4680a2d7 100644 --- a/packages/inference/src/types.ts +++ b/packages/inference/src/types.ts @@ -1,4 +1,4 @@ -import type { ChatCompletionInput, PipelineType } from "@huggingface/tasks"; +import type { ChatCompletionInput, FeatureExtractionInput, PipelineType } from "@huggingface/tasks"; /** * HF model id, like "meta-llama/Llama-3.3-70B-Instruct" @@ -86,6 +86,7 @@ export type RequestArgs = BaseArgs & | { text: string } | { audio_url: string } | ChatCompletionInput + | FeatureExtractionInput ) & { parameters?: Record; }; From a24aa449a26a856d174fc5067254e8958b4e7529 Mon Sep 17 00:00:00 2001 From: machineuser Date: Thu, 27 Feb 2025 14:00:23 +0000 Subject: [PATCH 4/8] =?UTF-8?q?=F0=9F=94=96=20@huggingface/tasks=200.16.3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- packages/tasks/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/tasks/package.json b/packages/tasks/package.json index 3329ee5e9..627e3ecc6 100644 --- a/packages/tasks/package.json +++ b/packages/tasks/package.json @@ -1,7 +1,7 @@ { "name": "@huggingface/tasks", "packageManager": "pnpm@8.10.5", - "version": "0.16.2", + "version": "0.16.3", "description": "List of ML tasks for huggingface.co/tasks", "repository": "/~https://github.com/huggingface/huggingface.js.git", "publishConfig": { From 7dbbfd664e465b471ab2a45d101cca46de4c36ee Mon Sep 17 00:00:00 2001 From: machineuser Date: Thu, 27 Feb 2025 14:01:51 +0000 Subject: [PATCH 5/8] =?UTF-8?q?=F0=9F=94=96=20@huggingface/inference=203.3?= =?UTF-8?q?.7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 2 +- packages/inference/package.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index eeda91985..c12a01ef2 100644 --- a/README.md +++ b/README.md @@ -96,7 +96,7 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or ```html ``` diff --git a/packages/inference/package.json b/packages/inference/package.json index 23c80e2fc..73ba19cad 100644 --- a/packages/inference/package.json +++ b/packages/inference/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/inference", - "version": "3.3.6", + "version": "3.3.7", "packageManager": "pnpm@8.10.5", "license": "MIT", "author": "Tim Mikeladze ", From 3857938703d7e9c8e8b3be39ade0102f09bb0fce Mon Sep 17 00:00:00 2001 From: HuggingFaceInfra <148469759+HuggingFaceInfra@users.noreply.github.com> Date: Thu, 27 Feb 2025 15:48:18 +0100 Subject: [PATCH 6/8] =?UTF-8?q?[ollama-utils]=20=F0=9F=A4=96=20Auto-update?= =?UTF-8?q?=20chat=20templates=20(2025-02-27)=20(#1236)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR is auto-generated by [generate-automap.ts](/~https://github.com/huggingface/huggingface.js/blob/main/packages/ollama-utils/scripts/generate-automap.ts). Co-authored-by: machineuser --- .../ollama-utils/src/chat-template-automap.ts | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/packages/ollama-utils/src/chat-template-automap.ts b/packages/ollama-utils/src/chat-template-automap.ts index 9bcc81785..e71f27a6c 100644 --- a/packages/ollama-utils/src/chat-template-automap.ts +++ b/packages/ollama-utils/src/chat-template-automap.ts @@ -1,7 +1,17 @@ // This file is auto generated, please do not modify manually // To update it, run "pnpm run build:automap" -import type { OllamaChatTemplateMapEntry } from "./types"; +import { OllamaChatTemplateMapEntry } from "./types"; + +/** + * Skipped these models due to error: + * - library/codegemma:7b + * - library/hermes3:405b + * - library/mistral-small:24b + * - library/granite3.1-moe:1b + * - library/reader-lm:1.5b + * - library/llama-pro:latest + */ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ { @@ -367,6 +377,15 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ tokens: ["<|tool_call|>", "", "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>"], }, }, + { + model: "library/granite3.2:8b", + gguf: "{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content'] %}\n {%- set loop_messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"Knowledge Cutoff Date: April 2024.\nToday's Date: \" + strftime_now('%B %d, %Y') + \".\nYou are Granite, developed by IBM.\" %}\n {%- if tools and documents %}\n {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\n\nWrite the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif tools %}\n {%- set system_message = system_message + \" You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user's query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request.\" %}\n {%- elif documents %}\n {%- set system_message = system_message + \" Write the response to the user's input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data.\" %}\n {%- elif thinking %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\nRespond to every user query in a comprehensive and detailed way. You can write down your thoughts and reasoning process before responding. In the thought process, engage in a comprehensive cycle of analysis, summarization, exploration, reassessment, reflection, backtracing, and iteration to develop well-considered thinking process. In the response section, based on various attempts, explorations, and reflections from the thoughts section, systematically present the final solution that you deem correct. The response should summarize the thought process. Write your thoughts after 'Here is my thought process:' and write your response after 'Here is my response:' for each user query.\" %}\n {%- else %}\n {%- set system_message = system_message + \" You are a helpful AI assistant.\" %} \n {%- endif %}\n {%- if 'citations' in controls and documents %}\n {%- set system_message = system_message + '\n\nIn your response, use the symbols and to indicate when a fact comes from a document in the search result, e.g 0 for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list.' %}\n {%- endif %}\n {%- if 'hallucinations' in controls and documents %}\n {%- set system_message = system_message + '\n\nFinally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents.' %}\n {%- endif %}\n {%- set loop_messages = messages %}\n{%- endif %}\n{{- '<|start_of_role|>system<|end_of_role|>' + system_message + '<|end_of_text|>\n' }}\n{%- if tools %}\n {{- '<|start_of_role|>tools<|end_of_role|>' }}\n {{- tools | tojson(indent=4) }}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- if documents %}\n {{- '<|start_of_role|>documents<|end_of_role|>' }}\n {%- for document in documents %}\n {{- 'Document ' + loop.index0 | string + '\n' }}\n {{- document['text'] }}\n {%- if not loop.last %}\n {{- '\n\n'}}\n {%- endif%}\n {%- endfor %}\n {{- '<|end_of_text|>\n' }}\n{%- endif %}\n{%- for message in loop_messages %}\n {{- '<|start_of_role|>' + message['role'] + '<|end_of_role|>' + message['content'] + '<|end_of_text|>\n' }}\n {%- if loop.last and add_generation_prompt %}\n {{- '<|start_of_role|>assistant' }}\n {%- if controls %}\n {{- ' ' + controls | tojson()}}\n {%- endif %}\n {{- '<|end_of_role|>' }}\n {%- endif %}\n{%- endfor %}", + ollama: { + template: + '{{- /*\n\n------ MESSAGE PARSING ------\n\n*/}}\n{{- /*\nDeclare the prompt structure variables to be filled in from messages\n*/}}\n{{- $system := "" }}\n{{- $documents := "" }}\n{{- $documentCounter := 0 }}\n{{- $thinking := false }}\n{{- $citations := false }}\n{{- $hallucinations := false }}\n{{- $length := "" }}\n\n{{- /*\nLoop over messages and look for a user-provided system message and documents\n*/ -}}\n{{- range .Messages }}\n\n {{- /* User defined system prompt(s) */}}\n {{- if (eq .Role "system")}}\n {{- if (ne $system "") }}\n {{- $system = print $system "\\n\\n" }}\n {{- end}}\n {{- $system = print $system .Content }}\n {{- end}}\n\n {{- /*\n NOTE: Since Ollama collates consecutive roles, for control and documents, we\n work around this by allowing the role to contain an qualifier after the\n role string.\n */ -}}\n\n {{- /* Role specified thinking */ -}}\n {{- if (and (ge (len .Role) 7) (eq (slice .Role 0 7) "control")) }}\n {{- if (eq .Content "thinking")}}{{- $thinking = true }}{{- end}}\n {{- if (eq .Content "citations")}}{{- $citations = true }}{{- end}}\n {{- if (eq .Content "hallucinations")}}{{- $hallucinations = true }}{{- end}}\n {{- if (and (ge (len .Content) 7) (eq (slice .Content 0 7) "length "))}}\n {{- $length = print " {\\"length\\": \\"" (slice .Content 7) "\\"}" }}\n {{- end}}\n {{- end}}\n\n {{- /* Role specified document */ -}}\n {{- if (and (ge (len .Role) 8) (eq (slice .Role 0 8) "document")) }}\n {{- if (ne $documentCounter 0)}}\n {{- $documents = print $documents "\\n\\n"}}\n {{- end}}\n {{- $identifier := $documentCounter}}\n {{- if (ge (len .Role) 9) }}\n {{- $identifier = (slice .Role 8)}}\n {{- end}}\n {{- $documents = print $documents "Document " $identifier "\\n" .Content}}\n {{- $documentCounter = len (printf "a%*s" $documentCounter "")}}\n {{- end}}\n{{- end}}\n\n{{- /*\nIf no user message provided, build the default system message\n*/ -}}\n{{- if eq $system "" }}\n {{- $system = "Knowledge Cutoff Date: April 2024.\\nYou are Granite, developed by IBM."}}\n\n {{- /* Add Tools prompt */}}\n {{- if .Tools }}\n {{- $system = print $system " You are a helpful AI assistant with access to the following tools. When a tool is required to answer the user\'s query, respond with <|tool_call|> followed by a JSON list of tools used. If a tool does not exist in the provided list of tools, notify the user that you do not have the ability to fulfill the request." }}\n {{- end}}\n\n {{- /* Add documents prompt */}}\n {{- if $documents }}\n {{- if .Tools }}\n {{- $system = print $system "\\n\\n"}}\n {{- else }}\n {{- $system = print $system " "}}\n {{- end}}\n {{- $system = print $system "Write the response to the user\'s input by strictly aligning with the facts in the provided documents. If the information needed to answer the question is not available in the documents, inform the user that the question cannot be answered based on the available data." }}\n {{- if $citations}}\n {{- $system = print $system "\\n\\nIn your response, use the symbols and to indicate when a fact comes from a document in the search result, e.g 0 for a fact from document 0. Afterwards, list all the citations with their corresponding documents in an ordered list."}}\n {{- end}}\n {{- if $hallucinations}}\n {{- $system = print $system "\\n\\nFinally, after the response is written, include a numbered list of sentences from the response that are potentially hallucinated and not based in the documents."}}\n {{- end}}\n {{- end}}\n\n {{- /* Prompt without tools or documents */}}\n {{- if (and (not .Tools) (not $documents)) }}\n {{- $system = print $system " You are a helpful AI assistant."}}\n {{- if $thinking}}\n {{- $system = print $system "\\nRespond to every user query in a comprehensive and detailed way. You can write down your thought process before responding. Write your thoughts after \'Here is my thought process:\' and write your response after \'Here is my response:\' for each user query."}}\n {{- end}}\n {{- end}}\n\n {{- /* Add thinking prompt if no tools or documents */}}\n {{- if (and $thinking (not .Tools) (not $documents)) }}\n {{- $system = print $system " You are a helpful AI assistant.\\nRespond to every user query in a comprehensive and detailed way. You can write down your thought process before responding. Write your thoughts after \'Here is my thought process:\' and write your response after \'Here is my response:\' for each user query."}}\n {{- end}}\n\n{{- end}}\n{{- /*\n\n------ TEMPLATE EXPANSION ------\n\n*/}}\n{{- /* System Prompt */ -}}\n<|start_of_role|>system<|end_of_role|>{{- $system }}<|end_of_text|>\n\n{{- /* Tools */ -}}\n{{- if .Tools }}\n<|start_of_role|>tools<|end_of_role|>[\n{{- range $index, $_ := .Tools }}\n{{ . }}\n{{- if and (ne (len (slice $.Tools $index)) 1) (gt (len $.Tools) 1) }},\n{{- end}}\n{{- end }}\n]\n{{- end}}\n\n{{- /* Documents */ -}}\n{{- if $documents }}\n<|start_of_role|>documents<|end_of_role|>\n{{ $documents }}<|end_of_text|>\n{{- end}}\n\n{{- /* Standard Messages */}}\n{{- range $index, $_ := .Messages }}\n{{- if (and\n (ne .Role "system")\n (or (lt (len .Role) 7) (ne (slice .Role 0 7) "control"))\n (or (lt (len .Role) 8) (ne (slice .Role 0 8) "document"))\n)}}\n<|start_of_role|>\n{{- if eq .Role "tool" }}tool_response\n{{- else }}{{ .Role }}\n{{- end }}<|end_of_role|>\n{{- if .Content }}{{ .Content }}\n{{- else if .ToolCalls }}<|tool_call|>\n{{- range .ToolCalls }}{"name": "{{ .Function.Name }}", "arguments": {{ .Function.Arguments }}}\n{{- end }}\n{{- end }}\n{{- if eq (len (slice $.Messages $index)) 1 }}\n{{- if eq .Role "assistant" }}\n{{- else }}<|end_of_text|>\n<|start_of_role|>assistant<|end_of_role|>\n{{- end -}}\n{{- else }}<|end_of_text|>\n{{- end }}\n{{- end }}\n{{- end }}', + tokens: ["<|tool_call|>", "", "<|start_of_role|>", "<|end_of_role|>", "<|end_of_text|>"], + }, + }, { model: "library/hermes3:70b", gguf: "{{bos_token}}{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", @@ -747,6 +766,31 @@ export const OLLAMA_CHAT_TEMPLATE_MAPPING: OllamaChatTemplateMapEntry[] = [ }, }, }, + { + model: "library/r1-1776:671b", + gguf: "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% set ns = namespace(is_first=false, is_tool=false, is_output_first=true, system_prompt='', is_first_sp=true) %}{%- for message in messages %}{%- if message['role'] == 'system' %}{%- if ns.is_first_sp %}{% set ns.system_prompt = ns.system_prompt + message['content'] %}{% set ns.is_first_sp = false %}{%- else %}{% set ns.system_prompt = ns.system_prompt + '\\n\\n' + message['content'] %}{%- endif %}{%- endif %}{%- endfor %}{{ bos_token }}{{ ns.system_prompt }}{%- for message in messages %}{%- if message['role'] == 'user' %}{%- set ns.is_tool = false -%}{{'<|User|>' + message['content']}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' in message %}{%- set ns.is_tool = false -%}{%- for tool in message['tool_calls'] %}{%- if not ns.is_first %}{%- if message['content'] is none %}{{'<|Assistant|><|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- else %}{{'<|Assistant|>' + message['content'] + '<|tool▁calls▁begin|><|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- set ns.is_first = true -%}{%- else %}{{'\\n' + '<|tool▁call▁begin|>' + tool['type'] + '<|tool▁sep|>' + tool['function']['name'] + '\\n' + '```json' + '\\n' + tool['function']['arguments'] + '\\n' + '```' + '<|tool▁call▁end|>'}}{%- endif %}{%- endfor %}{{'<|tool▁calls▁end|><|end▁of▁sentence|>'}}{%- endif %}{%- if message['role'] == 'assistant' and 'tool_calls' not in message %}{%- if ns.is_tool %}{{'<|tool▁outputs▁end|>' + message['content'] + '<|end▁of▁sentence|>'}}{%- set ns.is_tool = false -%}{%- else %}{% set content = message['content'] %}{% if '' in content %}{% set content = content.split('')[-1] %}{% endif %}{{'<|Assistant|>' + content + '<|end▁of▁sentence|>'}}{%- endif %}{%- endif %}{%- if message['role'] == 'tool' %}{%- set ns.is_tool = true -%}{%- if ns.is_output_first %}{{'<|tool▁outputs▁begin|><|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- set ns.is_output_first = false %}{%- else %}{{'<|tool▁output▁begin|>' + message['content'] + '<|tool▁output▁end|>'}}{%- endif %}{%- endif %}{%- endfor -%}{% if ns.is_tool %}{{'<|tool▁outputs▁end|>'}}{% endif %}{% if add_generation_prompt and not ns.is_tool %}{{'<|Assistant|>'}}{% endif %}", + ollama: { + template: + '{{- if .System }}{{ .System }}{{ end }}\n{{- range $i, $_ := .Messages }}\n{{- $last := eq (len (slice $.Messages $i)) 1}}\n{{- if eq .Role "user" }}<|User|>{{ .Content }}\n{{- else if eq .Role "assistant" }}<|Assistant|>{{ .Content }}{{- if not $last }}<|end▁of▁sentence|>{{- end }}\n{{- end }}\n{{- if and $last (ne .Role "assistant") }}<|Assistant|>{{- end }}\n{{- end }}', + tokens: [ + "<|User|>", + "<|Assistant|>", + "<|tool▁calls▁begin|>", + "<|tool▁call▁begin|>", + "<|tool▁sep|>", + "<|tool▁call▁end|>", + "<|tool▁calls▁end|>", + "<|end▁of▁sentence|>", + "<|tool▁outputs▁end|>", + "<|tool▁outputs▁begin|>", + "<|tool▁output▁begin|>", + "<|tool▁output▁end|>", + ], + params: { + stop: ["<|begin▁of▁sentence|>", "<|end▁of▁sentence|>", "<|User|>", "<|Assistant|>"], + }, + }, + }, { model: "library/sailor2:1b", gguf: "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are an AI assistant named Sailor2, created by Sea AI Lab. As an AI assistant, you can answer questions in English, Chinese, and Southeast Asian languages such as Burmese, Cebuano, Ilocano, Indonesian, Javanese, Khmer, Lao, Malay, Sundanese, Tagalog, Thai, Vietnamese, and Waray. Your responses should be friendly, unbiased, informative, detailed, and faithful.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", From 2f029684aa67285078c180125803661ed89d4dbd Mon Sep 17 00:00:00 2001 From: alexrs-cohere Date: Thu, 27 Feb 2025 16:02:06 +0100 Subject: [PATCH 7/8] Add Cohere provider (#1202) ### What Adds Cohere as an inference provider. ### Test Plan Added new tests for Cohere both with and without streaming. ### What Should Reviewers Focus On? Is the implementation correct? Anything important that I missed? Also happy to get feedback on the code, I am a bit rusty with my JS! --------- Co-authored-by: SBrandeis --- .github/workflows/test.yml | 3 ++ README.md | 12 ++--- packages/inference/README.md | 2 + .../inference/src/lib/makeRequestOptions.ts | 2 + packages/inference/src/providers/cohere.ts | 42 +++++++++++++++ packages/inference/src/providers/consts.ts | 1 + packages/inference/src/types.ts | 1 + packages/inference/test/HfInference.spec.ts | 47 ++++++++++++++++ packages/inference/test/tapes.json | 53 +++++++++++++++++++ packages/tasks/src/inference-providers.ts | 1 + 10 files changed, 158 insertions(+), 6 deletions(-) create mode 100644 packages/inference/src/providers/cohere.ts diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 357b34b51..cb6195ab0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -42,6 +42,7 @@ jobs: env: HF_TOKEN: ${{ secrets.HF_TOKEN }} HF_BLACK_FOREST_LABS_KEY: dummy + HF_COHERE_KEY: dummy HF_FAL_KEY: dummy HF_FIREWORKS_KEY: dummy HF_HYPERBOLIC_KEY: dummy @@ -87,6 +88,7 @@ jobs: env: HF_TOKEN: ${{ secrets.HF_TOKEN }} HF_BLACK_FOREST_LABS_KEY: dummy + HF_COHERE_KEY: dummy HF_FAL_KEY: dummy HF_FIREWORKS_KEY: dummy HF_HYPERBOLIC_KEY: dummy @@ -159,6 +161,7 @@ jobs: NPM_CONFIG_REGISTRY: http://localhost:4874/ HF_TOKEN: ${{ secrets.HF_TOKEN }} HF_BLACK_FOREST_LABS_KEY: dummy + HF_COHERE_KEY: dummy HF_FAL_KEY: dummy HF_FIREWORKS_KEY: dummy HF_HYPERBOLIC_KEY: dummy diff --git a/README.md b/README.md index c12a01ef2..62c76a1a9 100644 --- a/README.md +++ b/README.md @@ -23,7 +23,7 @@ await uploadFile({ // Can work with native File in browsers file: { path: "pytorch_model.bin", - content: new Blob(...) + content: new Blob(...) } }); @@ -39,7 +39,7 @@ await inference.chatCompletion({ ], max_tokens: 512, temperature: 0.5, - provider: "sambanova", // or together, fal-ai, replicate, … + provider: "sambanova", // or together, fal-ai, replicate, cohere … }); await inference.textToImage({ @@ -146,12 +146,12 @@ for await (const chunk of inference.chatCompletionStream({ console.log(chunk.choices[0].delta.content); } -/// Using a third-party provider: +/// Using a third-party provider: await inference.chatCompletion({ model: "meta-llama/Llama-3.1-8B-Instruct", messages: [{ role: "user", content: "Hello, nice to meet you!" }], max_tokens: 512, - provider: "sambanova", // or together, fal-ai, replicate, … + provider: "sambanova", // or together, fal-ai, replicate, cohere … }) await inference.textToImage({ @@ -211,7 +211,7 @@ await uploadFile({ // Can work with native File in browsers file: { path: "pytorch_model.bin", - content: new Blob(...) + content: new Blob(...) } }); @@ -244,7 +244,7 @@ console.log(messages); // contains the data // or you can run the code directly, however you can't check that the code is safe to execute this way, use at your own risk. const messages = await agent.run("Draw a picture of a cat wearing a top hat. Then caption the picture and read it out loud.") -console.log(messages); +console.log(messages); ``` There are more features of course, check each library's README! diff --git a/packages/inference/README.md b/packages/inference/README.md index 3289fc674..a6c0bc4a5 100644 --- a/packages/inference/README.md +++ b/packages/inference/README.md @@ -56,6 +56,7 @@ Currently, we support the following providers: - [Sambanova](https://sambanova.ai) - [Together](https://together.xyz) - [Blackforestlabs](https://blackforestlabs.ai) +- [Cohere](https://cohere.com) To send requests to a third-party provider, you have to pass the `provider` parameter to the inference function. Make sure your request is authenticated with an access token. ```ts @@ -80,6 +81,7 @@ Only a subset of models are supported when requesting third-party providers. You - [Replicate supported models](https://huggingface.co/api/partners/replicate/models) - [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models) - [Together supported models](https://huggingface.co/api/partners/together/models) +- [Cohere supported models](https://huggingface.co/api/partners/cohere/models) - [HF Inference API (serverless)](https://huggingface.co/models?inference=warm&sort=trending) ❗**Important note:** To be compatible, the third-party API must adhere to the "standard" shape API we expect on HF model pages for each pipeline task type. diff --git a/packages/inference/src/lib/makeRequestOptions.ts b/packages/inference/src/lib/makeRequestOptions.ts index 98fc277d7..8121938e3 100644 --- a/packages/inference/src/lib/makeRequestOptions.ts +++ b/packages/inference/src/lib/makeRequestOptions.ts @@ -1,5 +1,6 @@ import { HF_HUB_URL, HF_ROUTER_URL } from "../config"; import { BLACK_FOREST_LABS_CONFIG } from "../providers/black-forest-labs"; +import { COHERE_CONFIG } from "../providers/cohere"; import { FAL_AI_CONFIG } from "../providers/fal-ai"; import { FIREWORKS_AI_CONFIG } from "../providers/fireworks-ai"; import { HF_INFERENCE_CONFIG } from "../providers/hf-inference"; @@ -27,6 +28,7 @@ let tasks: Record | null = null; */ const providerConfigs: Record = { "black-forest-labs": BLACK_FOREST_LABS_CONFIG, + cohere: COHERE_CONFIG, "fal-ai": FAL_AI_CONFIG, "fireworks-ai": FIREWORKS_AI_CONFIG, "hf-inference": HF_INFERENCE_CONFIG, diff --git a/packages/inference/src/providers/cohere.ts b/packages/inference/src/providers/cohere.ts new file mode 100644 index 000000000..b48e07346 --- /dev/null +++ b/packages/inference/src/providers/cohere.ts @@ -0,0 +1,42 @@ +/** + * See the registered mapping of HF model ID => Cohere model ID here: + * + * https://huggingface.co/api/partners/cohere/models + * + * This is a publicly available mapping. + * + * If you want to try to run inference for a new model locally before it's registered on huggingface.co, + * you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes. + * + * - If you work at Cohere and want to update this mapping, please use the model mapping API we provide on huggingface.co + * - If you're a community member and want to add a new supported HF model to Cohere, please open an issue on the present repo + * and we will tag Cohere team members. + * + * Thanks! + */ +import type { ProviderConfig, UrlParams, HeaderParams, BodyParams } from "../types"; + +const COHERE_API_BASE_URL = "https://api.cohere.com"; + + +const makeBody = (params: BodyParams): Record => { + return { + ...params.args, + model: params.model, + }; +}; + +const makeHeaders = (params: HeaderParams): Record => { + return { Authorization: `Bearer ${params.accessToken}` }; +}; + +const makeUrl = (params: UrlParams): string => { + return `${params.baseUrl}/compatibility/v1/chat/completions`; +}; + +export const COHERE_CONFIG: ProviderConfig = { + baseUrl: COHERE_API_BASE_URL, + makeBody, + makeHeaders, + makeUrl, +}; diff --git a/packages/inference/src/providers/consts.ts b/packages/inference/src/providers/consts.ts index b782767a1..6089a14c5 100644 --- a/packages/inference/src/providers/consts.ts +++ b/packages/inference/src/providers/consts.ts @@ -17,6 +17,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record; export const INFERENCE_PROVIDERS = [ "black-forest-labs", + "cohere", "fal-ai", "fireworks-ai", "hf-inference", diff --git a/packages/inference/test/HfInference.spec.ts b/packages/inference/test/HfInference.spec.ts index ec10bdc1d..69bbb83f0 100644 --- a/packages/inference/test/HfInference.spec.ts +++ b/packages/inference/test/HfInference.spec.ts @@ -1350,4 +1350,51 @@ describe.concurrent("HfInference", () => { }, TIMEOUT ); + describe.concurrent( + "Cohere", + () => { + const client = new HfInference(env.HF_COHERE_KEY); + + HARDCODED_MODEL_ID_MAPPING["cohere"] = { + "CohereForAI/c4ai-command-r7b-12-2024": "command-r7b-12-2024", + "CohereForAI/aya-expanse-8b": "c4ai-aya-expanse-8b", + }; + + it("chatCompletion", async () => { + const res = await client.chatCompletion({ + model: "CohereForAI/c4ai-command-r7b-12-2024", + provider: "cohere", + messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }], + }); + if (res.choices && res.choices.length > 0) { + const completion = res.choices[0].message?.content; + expect(completion).toContain("two"); + } + }); + + it("chatCompletion stream", async () => { + const stream = client.chatCompletionStream({ + model: "CohereForAI/c4ai-command-r7b-12-2024", + provider: "cohere", + messages: [{ role: "user", content: "Say 'this is a test'" }], + stream: true, + }) as AsyncGenerator; + + let fullResponse = ""; + for await (const chunk of stream) { + if (chunk.choices && chunk.choices.length > 0) { + const content = chunk.choices[0].delta?.content; + if (content) { + fullResponse += content; + } + } + } + + // Verify we got a meaningful response + expect(fullResponse).toBeTruthy(); + expect(fullResponse.length).toBeGreaterThan(0); + }); + }, + TIMEOUT + ); }); diff --git a/packages/inference/test/tapes.json b/packages/inference/test/tapes.json index 658ebdcc2..9479c41ce 100644 --- a/packages/inference/test/tapes.json +++ b/packages/inference/test/tapes.json @@ -7386,5 +7386,58 @@ "content-type": "image/jpeg" } } + }, + "cb34d07934bd210fd64da207415c49fc6e2870d3564164a2a5d541f713227fbf": { + "url": "https://api.cohere.com/compatibility/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say 'this is a test'\"}],\"stream\":true,\"model\":\"command-r7b-12-2024\"}" + }, + "response": { + "body": "data: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\"\",\"role\":\"assistant\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\"This\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" is\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" a\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\" test\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":null,\"delta\":{\"content\":\".\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\"}\n\ndata: {\"id\":\"3178eb0c-d523-4504-bb82-01b8f02da6da\",\"choices\":[{\"index\":0,\"finish_reason\":\"stop\",\"delta\":{}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion.chunk\",\"usage\":{\"prompt_tokens\":7,\"completion_tokens\":5,\"total_tokens\":12}}\n\ndata: [DONE]\n\n", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-expose-headers": "X-Debug-Trace-ID", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "cache-control": "no-cache, no-store, no-transform, must-revalidate, private, max-age=0", + "content-type": "text/event-stream", + "expires": "Thu, 01 Jan 1970 00:00:00 UTC", + "pragma": "no-cache", + "server": "envoy", + "transfer-encoding": "chunked", + "vary": "Origin" + } + } + }, + "8c6ffbc794573c463ed5666e3b560e5966cd975c2893c901c18adb696ba54a6a": { + "url": "https://api.cohere.com/compatibility/v1/chat/completions", + "init": { + "headers": { + "Content-Type": "application/json" + }, + "method": "POST", + "body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"command-r7b-12-2024\"}" + }, + "response": { + "body": "{\"id\":\"f8bf661b-c600-44e5-8412-df37c9dcd985\",\"choices\":[{\"index\":0,\"finish_reason\":\"stop\",\"message\":{\"role\":\"assistant\",\"content\":\"One plus one is equal to two.\"}}],\"created\":1740652112,\"model\":\"command-r7b-12-2024\",\"object\":\"chat.completion\",\"usage\":{\"prompt_tokens\":11,\"completion_tokens\":8,\"total_tokens\":19}}", + "status": 200, + "statusText": "OK", + "headers": { + "access-control-expose-headers": "X-Debug-Trace-ID", + "alt-svc": "h3=\":443\"; ma=2592000,h3-29=\":443\"; ma=2592000", + "cache-control": "no-cache, no-store, no-transform, must-revalidate, private, max-age=0", + "content-type": "application/json", + "expires": "Thu, 01 Jan 1970 00:00:00 UTC", + "num_chars": "2635", + "num_tokens": "19", + "pragma": "no-cache", + "server": "envoy", + "vary": "Origin" + } + } } } \ No newline at end of file diff --git a/packages/tasks/src/inference-providers.ts b/packages/tasks/src/inference-providers.ts index 82f3d808f..49de2553a 100644 --- a/packages/tasks/src/inference-providers.ts +++ b/packages/tasks/src/inference-providers.ts @@ -1,6 +1,7 @@ /// This list is for illustration purposes only. /// in the `tasks` sub-package, we do not need actual strong typing of the inference providers. const INFERENCE_PROVIDERS = [ + "cohere", "fal-ai", "fireworks-ai", "hf-inference", From 49c0e2d15ae96a2b51539fba49c890446b898d6e Mon Sep 17 00:00:00 2001 From: Pedro Cuenca Date: Thu, 27 Feb 2025 16:02:23 +0100 Subject: [PATCH 8/8] Add llama3.3 license to types (#1237) Related: /~https://github.com/huggingface/hub-docs/pull/1619 Also missing from here apparently @SBrandeis --- packages/hub/src/types/public.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/packages/hub/src/types/public.ts b/packages/hub/src/types/public.ts index 0941a152f..6a5b4a300 100644 --- a/packages/hub/src/types/public.ts +++ b/packages/hub/src/types/public.ts @@ -176,6 +176,7 @@ export type License = | "llama3" | "llama3.1" | "llama3.2" + | "llama3.3" | "gemma" | "apple-ascl" | "apple-amlr"