Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config_default.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
profiles:
- React-to-Me

retriever:
context_truncation:
max_docs: 15
max_tokens: 12000

features:
postprocessing: # external web search feature
enabled: true
Expand Down
5 changes: 3 additions & 2 deletions src/retrievers/csv_chroma.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from nltk.tokenize import word_tokenize
from pydantic import AfterValidator, Field
from pydantic.json_schema import SkipJsonSchema
from util.context_truncator import truncate_to_token_limit

chroma_settings = chromadb.config.Settings(anonymized_telemetry=False)

Expand Down Expand Up @@ -179,7 +180,7 @@ def retrieve_documents(self, queries: list[str], run_manager) -> list[Document]:
)
doc_lists.append(bm25_docs + vector_docs)
subdirectory_docs.extend(self.weighted_reciprocal_rank(doc_lists))
return subdirectory_docs
return truncate_to_token_limit(subdirectory_docs)

async def aretrieve_documents(
self, queries: list[str], run_manager
Expand Down Expand Up @@ -219,4 +220,4 @@ async def aretrieve_documents(
for bm25_results, vector_results in zip(results_iter, results_iter)
]
subdirectory_docs.extend(self.weighted_reciprocal_rank(doc_lists))
return subdirectory_docs
return truncate_to_token_limit(subdirectory_docs)
32 changes: 32 additions & 0 deletions src/util/context_truncator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from langchain_core.documents import Document
import tiktoken


def truncate_to_token_limit(
docs: list[Document],
max_docs: int = 15,
max_tokens: int = 12000,
model: str = "gpt-4o",
) -> list[Document]:
"""
Truncate document list to fit within token and count budgets.
Docs must already be ranked from best to worst (e.g. WRR).
Cuts from the bottom so least relevant docs are removed first.
"""
encoder = tiktoken.encoding_for_model(model)
result = []
total_tokens = 0

for doc in docs[:max_docs]:
doc_tokens = len(encoder.encode(doc.page_content))

if total_tokens + doc_tokens > max_tokens:
# always include at least one doc even if it exceeds budget
if not result:
result.append(doc)
break

result.append(doc)
total_tokens += doc_tokens

return result