Skip to content

Commit

Permalink
Merge pull request #49 from cagostino/chris/plonk_rag
Browse files Browse the repository at this point in the history
Chris/plonk rag
  • Loading branch information
cagostino authored Jan 22, 2025
2 parents 4ccb492 + b8b6cf7 commit e790b3e
Show file tree
Hide file tree
Showing 8 changed files with 153 additions and 334 deletions.
5 changes: 2 additions & 3 deletions npcsh/cli_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2216,12 +2216,11 @@ def enter_spool_mode(
os.getcwd(),
)

#sometimes claude responds with unfinished markdown notation. so we need to check if there are two sets
#of markdown notation and if not, we add it. so if # markdown notations is odd we add one more
# sometimes claude responds with unfinished markdown notation. so we need to check if there are two sets
# of markdown notation and if not, we add it. so if # markdown notations is odd we add one more
if assistant_reply.count("```") % 2 != 0:
assistant_reply = assistant_reply + "```"


render_markdown(assistant_reply)

except (KeyboardInterrupt, EOFError):
Expand Down
3 changes: 3 additions & 0 deletions npcsh/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ def ensure_npcshrc_exists() -> str:
npcshrc.write("export NPCSH_INITIALIZED=0\n")
npcshrc.write("export NPCSH_PROVIDER='ollama'\n")
npcshrc.write("export NPCSH_MODEL='llama3.2'\n")
npcshrc.write("export NPCSH_EMBEDDING_PROVIDER='ollama'\n")
npcshrc.write("export NPCSH_EMBEDDING_MODEL='nomic-embed-text'\n")

npcshrc.write("export NPCSH_API_URL=''")
npcshrc.write("export NPCSH_DB_PATH='~/npcsh_history.db'\n")
return npcshrc_path
Expand Down
40 changes: 27 additions & 13 deletions npcsh/llm_funcs.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@

from pydantic import BaseModel, Field

client = chromadb.PersistentClient(path="/home/caug/npcsh_chroma.db")
EMBEDDINGS_DB_PATH = os.path.expanduser("~/npcsh_chroma.db")

chroma_client = chromadb.PersistentClient(path=EMBEDDINGS_DB_PATH)


# Load environment variables from .env file
Expand Down Expand Up @@ -81,7 +83,8 @@ def load_env_from_execution_dir() -> None:
os.environ.get("NPCSH_VECTOR_DB_PATH", "~/npcsh_chroma.db")
)

NPCSH_EMBEDDING_MODEL = "nomic-embed-text"
NPCSH_EMBEDDING_MODEL = os.environ.get("NPCSH_EMBEDDING_MODEL","nomic-embed-text")
NPCSH_EMBEDDING_PROVIDER = os.environ.get("NPCSH_EMBEDDING_PROVIDER", "ollama")


def get_ollama_embeddings(
Expand Down Expand Up @@ -119,13 +122,22 @@ def get_anthropic_embeddings(
return embeddings


def store_embeddings_for_model(texts, embeddings, model, provider):
def store_embeddings_for_model(
texts,
embeddings,
metadata=None,
model: str = NPCSH_EMBEDDING_MODEL,
provider: str = NPCSH_EMBEDDING_PROVIDER,
):
collection_name = f"{provider}_{model}_embeddings"
collection = client.get_collection(collection_name)
collection = chroma_client.get_collection(collection_name)

# Create meaningful metadata for each document (adjust as necessary)
metadata = [{"text_length": len(text)} for text in texts] # Example metadata

if metadata is None:
metadata = [{"text_length": len(text)} for text in texts] # Example metadata
print(
"metadata is none, creating metadata for each document as the length of the text"
)
# Add embeddings to the collection with metadata
collection.add(
ids=[str(i) for i in range(len(texts))],
Expand All @@ -141,16 +153,16 @@ def delete_embeddings_from_collection(collection, ids):
collection.delete(ids=ids) # Only delete if ids are provided


def search_similar_texts_for_model(
def search_similar_texts(
query_embedding: List[float],
embedding_model: str,
provider: str,
top_k: int = 5,
db_path: str = npcsh_vector_db_path,
embedding_model: str = NPCSH_EMBEDDING_MODEL,
embedding_provider: str = NPCSH_EMBEDDING_PROVIDER,
) -> List[dict]:
"""Search for similar texts in Chroma using KNN."""
collection_name = f"{provider}_{embedding_model}_embeddings"
collection = client.get_collection(collection_name)
collection_name = f"{embedding_provider}_{embedding_model}_embeddings"
collection = chroma_client.get_collection(collection_name)

search_results = collection.query(query_embedding, n_results=top_k)

Expand All @@ -168,7 +180,9 @@ def search_similar_texts_for_model(


def get_embeddings(
texts: List[str], provider: str = npcsh_provider, model: str = NPCSH_EMBEDDING_MODEL
texts: List[str],
model: str = NPCSH_EMBEDDING_MODEL,
provider: str = NPCSH_EMBEDDING_PROVIDER,
) -> List[List[float]]:
"""Generate embeddings using the specified provider and store them in Chroma."""
if provider == "ollama":
Expand All @@ -181,7 +195,7 @@ def get_embeddings(
raise ValueError(f"Unsupported provider: {provider}")

# Store the embeddings in the relevant Chroma collection
store_embeddings_for_model(texts, embeddings, model, provider)
#store_embeddings_for_model(texts, embeddings, model, provider)
return embeddings


Expand Down
19 changes: 11 additions & 8 deletions npcsh/npc_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -638,13 +638,15 @@ class NPCCompiler:
def __init__(self, npc_directory, db_path):
self.npc_directory = npc_directory
self.dirs = [self.npc_directory]
if self.npc_directory == os.path.abspath("./npc_team"):
if self.npc_directory == os.path.abspath("./npc_team/"):
self.project_npc_directory = None
self.project_tools_directory = None
else:
self.project_npc_directory = os.path.abspath("./npc_team")
self.project_npc_directory = os.path.abspath("./npc_team/")
self.project_tools_directory = os.path.join(
self.project_npc_directory, "tools"
)
self.dirs.append(self.project_npc_directory)
self.project_tools_directory = os.path.join(self.project_npc_directory, "tools")

self.db_path = db_path
self.npc_cache = {}
Expand All @@ -654,7 +656,6 @@ def __init__(self, npc_directory, db_path):
# Set tools directories
self.global_tools_directory = os.path.join(self.npc_directory, "tools")


# Initialize Jinja environment with multiple loaders
self.jinja_env = Environment(
loader=FileSystemLoader(self.dirs),
Expand Down Expand Up @@ -773,11 +774,13 @@ def load_tool_from_file(self, tool_path: str) -> Union[dict, None]:
return None

def parse_all_npcs(self) -> None:
print(self.dirs)
for directory in self.dirs:
for filename in os.listdir(directory):
if filename.endswith(".npc"):
npc_path = os.path.join(directory, filename)
self.parse_npc_file(npc_path)
if os.path.exists(directory):
for filename in os.listdir(directory):
if filename.endswith(".npc"):
npc_path = os.path.join(directory, filename)
self.parse_npc_file(npc_path)

def parse_npc_file(self, npc_file_path: str) -> dict:
npc_file = os.path.basename(npc_file_path)
Expand Down
209 changes: 0 additions & 209 deletions npcsh/npcsh.py

This file was deleted.

Loading

0 comments on commit e790b3e

Please sign in to comment.