diff --git a/README.md b/README.md
index f38a4d3..bbe7037 100644
--- a/README.md
+++ b/README.md
@@ -30,11 +30,19 @@ Follow these steps to run the barebones Chainlit application.
```bash
poetry install
```
-4. Verify your `PYTHONPATH` environment variable includes `./src`:
+4. Verify your `PYTHONPATH` environment variable includes `./src`.
+
+ Linux / macOS:
```bash
echo $PYTHONPATH
# ./src
```
+
+ Windows (PowerShell):
+ ```powershell
+ echo $env:PYTHONPATH
+ # ./src
+ ```
5. List embeddings available for download:
```bash
./bin/embeddings_manager ls-remote
@@ -82,7 +90,7 @@ Follow these steps to run the complete application in Docker.
```bash
docker build -t reactome-chatbot .
```
-6. Start the Chainlit application and PostgrSQL database in Docker containers:
+6. Start the Chainlit application and PostgreSQL database in Docker containers:
```bash
docker-compose up
diff --git a/src/agent/profiles/cross_database.py b/src/agent/profiles/cross_database.py
index 31ab21a..1274566 100644
--- a/src/agent/profiles/cross_database.py
+++ b/src/agent/profiles/cross_database.py
@@ -1,5 +1,6 @@
from typing import Any, Literal
+from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import AIMessage, HumanMessage
@@ -15,19 +16,29 @@
create_uniprot_rewriter_w_reactome
from agent.tasks.cross_database.summarize_reactome_uniprot import \
create_reactome_uniprot_summarizer
+from agent.tasks.hallucination_grader import (HallucinationGrade,
+ create_hallucination_grader,
+ format_documents)
from retrievers.reactome.rag import create_reactome_rag
from retrievers.uniprot.rag import create_uniprot_rag
+from tools.external_search.state import WebSearchResult
+from tools.external_search.tavily_wrapper import TavilyWrapper
class CrossDatabaseState(BaseState):
reactome_query: str # LLM-generated query for Reactome
reactome_answer: str # LLM-generated answer from Reactome
+ reactome_context: list[Document] # Retrieved docs used to generate reactome_answer
reactome_completeness: str # LLM-assessed completeness of the Reactome answer
+ reactome_hallucination: str # "Yes" = grounded, "No" = hallucinated
uniprot_query: str # LLM-generated query for UniProt
uniprot_answer: str # LLM-generated answer from UniProt
+ uniprot_context: list[Document] # Retrieved docs used to generate uniprot_answer
uniprot_completeness: str # LLM-assessed completeness of the UniProt answer
+ uniprot_hallucination: str # "Yes" = grounded, "No" = hallucinated
+ web_search_results: list[WebSearchResult] # Tavily results when both DBs are incomplete
class CrossDatabaseGraphBuilder(BaseGraphBuilder):
def __init__(
@@ -42,6 +53,7 @@ def __init__(
self.uniprot_rag: Runnable = create_uniprot_rag(llm, embedding)
self.completeness_checker = create_completeness_grader(llm)
+ self.hallucination_grader = create_hallucination_grader(llm)
self.write_reactome_query = create_reactome_rewriter_w_uniprot(llm)
self.write_uniprot_query = create_uniprot_rewriter_w_reactome(llm)
self.summarize_final_answer = create_reactome_uniprot_summarizer(
@@ -55,13 +67,16 @@ def __init__(
state_graph.add_node("preprocess_question", self.preprocess)
state_graph.add_node("conduct_research", self.conduct_research)
state_graph.add_node("generate_reactome_answer", self.generate_reactome_answer)
+ state_graph.add_node("check_reactome_hallucination", self.check_reactome_hallucination)
state_graph.add_node("rewrite_reactome_query", self.rewrite_reactome_query)
state_graph.add_node("rewrite_reactome_answer", self.rewrite_reactome_answer)
state_graph.add_node("generate_uniprot_answer", self.generate_uniprot_answer)
+ state_graph.add_node("check_uniprot_hallucination", self.check_uniprot_hallucination)
state_graph.add_node("rewrite_uniprot_query", self.rewrite_uniprot_query)
state_graph.add_node("rewrite_uniprot_answer", self.rewrite_uniprot_answer)
state_graph.add_node("assess_completeness", self.assess_completeness)
state_graph.add_node("decide_next_steps", self.decide_next_steps)
+ state_graph.add_node("perform_web_search", self.perform_web_search)
state_graph.add_node("generate_final_response", self.generate_final_response)
state_graph.add_node("postprocess", self.postprocess)
# Set up edges
@@ -74,14 +89,18 @@ def __init__(
)
state_graph.add_edge("conduct_research", "generate_reactome_answer")
state_graph.add_edge("conduct_research", "generate_uniprot_answer")
- state_graph.add_edge("generate_reactome_answer", "assess_completeness")
- state_graph.add_edge("generate_uniprot_answer", "assess_completeness")
+ # Hallucination checks run immediately after each DB answer is generated
+ state_graph.add_edge("generate_reactome_answer", "check_reactome_hallucination")
+ state_graph.add_edge("generate_uniprot_answer", "check_uniprot_hallucination")
+ # Both hallucination checks feed into completeness assessment
+ state_graph.add_edge("check_reactome_hallucination", "assess_completeness")
+ state_graph.add_edge("check_uniprot_hallucination", "assess_completeness")
state_graph.add_conditional_edges(
"assess_completeness",
self.decide_next_steps,
{
"generate_final_response": "generate_final_response",
- "perform_web_search": "generate_final_response",
+ "perform_web_search": "perform_web_search",
"rewrite_reactome_query": "rewrite_reactome_query",
"rewrite_uniprot_query": "rewrite_uniprot_query",
},
@@ -90,6 +109,7 @@ def __init__(
state_graph.add_edge("rewrite_uniprot_query", "rewrite_uniprot_answer")
state_graph.add_edge("rewrite_reactome_answer", "generate_final_response")
state_graph.add_edge("rewrite_uniprot_answer", "generate_final_response")
+ state_graph.add_edge("perform_web_search", "generate_final_response")
state_graph.add_edge("generate_final_response", "postprocess")
state_graph.set_finish_point("postprocess")
@@ -116,26 +136,58 @@ async def conduct_research(
async def generate_reactome_answer(
self, state: CrossDatabaseState, config: RunnableConfig
) -> CrossDatabaseState:
- reactome_answer: dict[str, Any] = await self.reactome_rag.ainvoke(
+ reactome_result: dict[str, Any] = await self.reactome_rag.ainvoke(
{
"input": state["rephrased_input"],
"chat_history": state["chat_history"],
},
config,
)
- return CrossDatabaseState(reactome_answer=reactome_answer["answer"])
+ return CrossDatabaseState(
+ reactome_answer=reactome_result["answer"],
+ reactome_context=reactome_result.get("context", []),
+ )
+
+ async def check_reactome_hallucination(
+ self, state: CrossDatabaseState, config: RunnableConfig
+ ) -> CrossDatabaseState:
+ """Grade whether the Reactome answer is grounded in its retrieved documents."""
+ grade: HallucinationGrade = await self.hallucination_grader.ainvoke(
+ {
+ "documents": format_documents(state.get("reactome_context", [])),
+ "generation": state["reactome_answer"],
+ },
+ config,
+ )
+ return CrossDatabaseState(reactome_hallucination=grade.binary_score)
async def generate_uniprot_answer(
self, state: CrossDatabaseState, config: RunnableConfig
) -> CrossDatabaseState:
- uniprot_answer: dict[str, Any] = await self.uniprot_rag.ainvoke(
+ uniprot_result: dict[str, Any] = await self.uniprot_rag.ainvoke(
{
"input": state["rephrased_input"],
"chat_history": state["chat_history"],
},
config,
)
- return CrossDatabaseState(uniprot_answer=uniprot_answer["answer"])
+ return CrossDatabaseState(
+ uniprot_answer=uniprot_result["answer"],
+ uniprot_context=uniprot_result.get("context", []),
+ )
+
+ async def check_uniprot_hallucination(
+ self, state: CrossDatabaseState, config: RunnableConfig
+ ) -> CrossDatabaseState:
+ """Grade whether the UniProt answer is grounded in its retrieved documents."""
+ grade: HallucinationGrade = await self.hallucination_grader.ainvoke(
+ {
+ "documents": format_documents(state.get("uniprot_context", [])),
+ "generation": state["uniprot_answer"],
+ },
+ config,
+ )
+ return CrossDatabaseState(uniprot_hallucination=grade.binary_score)
async def rewrite_reactome_query(
self, state: CrossDatabaseState, config: RunnableConfig
@@ -223,12 +275,16 @@ async def decide_next_steps(self, state: CrossDatabaseState) -> Literal[
async def generate_final_response(
self, state: CrossDatabaseState, config: RunnableConfig
) -> CrossDatabaseState:
+ web_results_text = TavilyWrapper.format_results(
+ state.get("web_search_results", [])
+ )
final_response: str = await self.summarize_final_answer.ainvoke(
{
"input": state["rephrased_input"],
"detected_language": state["detected_language"],
"reactome_answer": state["reactome_answer"],
"uniprot_answer": state["uniprot_answer"],
+ "web_results": web_results_text,
},
config,
)
diff --git a/src/agent/tasks/cross_database/summarize_reactome_uniprot.py b/src/agent/tasks/cross_database/summarize_reactome_uniprot.py
index 8ea1746..b5ba698 100644
--- a/src/agent/tasks/cross_database/summarize_reactome_uniprot.py
+++ b/src/agent/tasks/cross_database/summarize_reactome_uniprot.py
@@ -4,31 +4,24 @@
from langchain_core.runnables import Runnable
summarization_message = """
-You are an expert in molecular biology with significant experience as a curator for the UniProt Database adn the Reactome Pathway Knowledgebase.
-Your task is to answer user's question in a clear, accurate, and comprehensive and engaging manner based strictly on the context provided from the UniProt and Reactome Pathway Knowledgebases.
+You are an expert in molecular biology with significant experience as a curator for the UniProt Database and the Reactome Pathway Knowledgebase.
+Your task is to answer the user's question in a clear, accurate, comprehensive, and engaging manner.
-Instructions:
- 1. Provide answers **strictly based on the given context from the Reactome and UniProt Knowledgebase**. Do **not** use or infer information from any external sources.
- 2. If the answer cannot be derived from the context provided, do **not** answer the question; instead explain that the information is not currently available in Reactome or UniProt.
- 3. Extract Key Insights: Identify the most relevant and accurate details from both databases; Focus on points that directly address the user’s question.
- 4. Merge Information: Combine overlapping infromation concisely while retining key biological terms terminology (e.g., gene names, protein names, pathway names, disease involvement, etc.)
- 5. Ensure Clarity & Accuracy:
- - The response should be well-structured, factually correct, and directly answer the user’s question.
- - Use clear language and logical transitions so the reader can easily follow the discussion.
- 4. Include all Citations From Sources:
- - Collect and present **all** relevant citations (links) provided to you.
- - Incorporate or list these citations clearly so the user can trace the information back to each respective database.
- - Example:
- - Reactome Citations:
- - Apoptosis
- - Cell Cycle
- - UniProt Citations:
- - GATA6
- - NR5A2
+IMPORTANT:
+1. **Language**: You MUST provide the answer in the following language: **{detected_language}**.
+2. **Context**: Base your answer strictly on the provided context from UniProt, Reactome, and (if provided) external web search results.
+3. **Accuracy**: Maintain exact biological terminology (gene names, protein IDs, pathway names, etc.) even when translating the explanation.
+4. **Citations**: Include all provided links/citations.
- 5. Answer in the Language requested.
- 6. Write in a conversational and engaging tone suitable for a chatbot.
- 6. Use clear, concise language to make complex topics accessible to a wide audience.
+Instructions:
+ 1. Provide answers strictly based on the provided context. Do **not** use or infer information from external knowledge not provided here.
+ 2. If the answer cannot be derived from the context, explain that the information is not currently available in Reactome or UniProt in the requested language.
+ 3. Merge information concisely while retaining key terminology.
+ 4. Format citations clearly:
+ - Reactome Citations: List links provided in the Reactome context.
+ - UniProt Citations: List links provided in the UniProt context.
+ - External Sources: List links from the Web Search results if available.
+ 5. Write in a conversational and engaging tone suitable for a scientific chatbot.
"""
summarizer_prompt = ChatPromptTemplate.from_messages(
@@ -36,7 +29,7 @@
("system", summarization_message),
(
"human",
- "User question: {input} \n\n Language: {detected_language} \n\n Reactome-drived information: \n {reactome_answer} \n\n UniProt-drived infromation: \n {uniprot_answer}.",
+ "User question: {input} \n\n Target Language: {detected_language} \n\n Reactome Information: \n {reactome_answer} \n\n UniProt Information: \n {uniprot_answer} \n\n Web Search Results (optional): \n {web_results}",
),
]
)
diff --git a/src/agent/tasks/hallucination_grader.py b/src/agent/tasks/hallucination_grader.py
new file mode 100644
index 0000000..3ccd9c6
--- /dev/null
+++ b/src/agent/tasks/hallucination_grader.py
@@ -0,0 +1,57 @@
+from langchain_core.documents import Document
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import Runnable
+from pydantic import BaseModel, Field
+
+hallucination_grader_message = """\
+You are an expert scientific fact-checker with deep knowledge of molecular biology, \
+the Reactome Pathway Knowledgebase, and the UniProt Knowledgebase.
+
+Your task is to assess whether a given LLM-generated answer is **grounded** in the \
+provided source documents. An answer is grounded if every factual claim it makes can \
+be directly traced to the retrieved context below.
+
+Respond with a binary output:
+ - Yes: Every factual claim in the answer is supported by the retrieved documents.
+ - No: The answer contains at least one claim that is NOT supported by the \
+retrieved documents (i.e., hallucinated or fabricated).
+
+Do NOT penalise an answer for being incomplete — only penalise unsupported claims.
+Do NOT use any external knowledge; judge only against the provided documents.
+"""
+
+hallucination_grader_prompt = ChatPromptTemplate.from_messages(
+ [
+ ("system", hallucination_grader_message),
+ (
+ "human",
+ "Retrieved documents:\n\n{documents}\n\nLLM generation:\n\n{generation}",
+ ),
+ ]
+)
+
+
+class HallucinationGrade(BaseModel):
+ binary_score: str = Field(
+ description=(
+ "Indicates whether the answer is grounded in the retrieved documents. "
+ "'Yes' means fully grounded, 'No' means at least one hallucinated claim."
+ )
+ )
+ reason: str = Field(
+ default="",
+ description=(
+ "If binary_score is 'No', briefly state which claim is not supported. "
+ "Leave empty when fully grounded."
+ ),
+ )
+
+
+def format_documents(documents: list[Document]) -> str:
+ """Concatenate document page content for prompt injection."""
+ return "\n\n".join(doc.page_content for doc in documents)
+
+
+def create_hallucination_grader(llm: BaseChatModel) -> Runnable:
+ return hallucination_grader_prompt | llm.with_structured_output(HallucinationGrade)
diff --git a/src/agent/tasks/rephrase.py b/src/agent/tasks/rephrase.py
index 1851747..cc2c5d0 100644
--- a/src/agent/tasks/rephrase.py
+++ b/src/agent/tasks/rephrase.py
@@ -4,16 +4,14 @@
from langchain_core.runnables import Runnable
contextualize_q_system_prompt = """
-You are an expert in question formulation with deep expertise in molecular biology and experience as a Reactome curator. Your task is to analyze the conversation history and the user’s latest query to fully understand their intent and what they seek to learn.
-If the user's question is not in English, reformulate the question and translate it to English, ensuring the meaning and intent are preserved.
-Reformulate the user’s question into a standalone version that retains its full meaning without requiring prior context. The reformulated question should be:
- - Clear, concise, and precise
- - Optimized for both vector search (semantic meaning) and case-sensitive keyword search
- - Faithful to the user’s intent and scientific accuracy
+You are an expert in question formulation for molecular biology.
+Your task is to analyze the conversation history and the user's latest query to create a standalone version of the question.
-the returned question should always be in English.
-If the user’s question is already in English, self-contained and well-formed, return it as is.
-Do NOT answer the question or provide explanations.
+IMPORTANT:
+- If the user's question is NOT in English, translate it to English for this step.
+- Internal Search Optimization: This English translation is strictly for optimizing vector search and keyword matching in the Reactome and UniProt databases.
+- The standalone question should be clear, concise, and scientifically accurate.
+- Do NOT answer the question. Only return the reformulated English question.
"""
contextualize_q_prompt = ChatPromptTemplate.from_messages(