reactome · bhavyakeerthi3 · Mar 5, 2026 · Mar 6, 2026 · Mar 7, 2026
diff --git a/README.md b/README.md
@@ -30,11 +30,19 @@ Follow these steps to run the barebones Chainlit application.
     ```bash
     poetry install
     ```
-4. Verify your `PYTHONPATH` environment variable includes `./src`:
+4. Verify your `PYTHONPATH` environment variable includes `./src`.
+
+    Linux / macOS:
     ```bash
     echo $PYTHONPATH
     # ./src
     ```
+
+    Windows (PowerShell):
+    ```powershell
+    echo $env:PYTHONPATH
+    # ./src
+    ```
 5. List embeddings available for download:
     ```bash
     ./bin/embeddings_manager ls-remote
@@ -82,7 +90,7 @@ Follow these steps to run the complete application in Docker.
     ```bash
     docker build -t reactome-chatbot .
     ```
-6. Start the Chainlit application and PostgrSQL database in Docker containers:
+6. Start the Chainlit application and PostgreSQL database in Docker containers:
     ```bash
     docker-compose up
 

diff --git a/src/agent/profiles/cross_database.py b/src/agent/profiles/cross_database.py
@@ -1,5 +1,6 @@
 from typing import Any, Literal
 
+from langchain_core.documents import Document
 from langchain_core.embeddings import Embeddings
 from langchain_core.language_models.chat_models import BaseChatModel
 from langchain_core.messages import AIMessage, HumanMessage
@@ -15,19 +16,29 @@
     create_uniprot_rewriter_w_reactome
 from agent.tasks.cross_database.summarize_reactome_uniprot import \
     create_reactome_uniprot_summarizer
+from agent.tasks.hallucination_grader import (HallucinationGrade,
+                                              create_hallucination_grader,
+                                              format_documents)
 from retrievers.reactome.rag import create_reactome_rag
 from retrievers.uniprot.rag import create_uniprot_rag
+from tools.external_search.state import WebSearchResult
+from tools.external_search.tavily_wrapper import TavilyWrapper
 
 
 class CrossDatabaseState(BaseState):
     reactome_query: str  # LLM-generated query for Reactome
     reactome_answer: str  # LLM-generated answer from Reactome
+    reactome_context: list[Document]  # Retrieved docs used to generate reactome_answer
     reactome_completeness: str  # LLM-assessed completeness of the Reactome answer
+    reactome_hallucination: str  # "Yes" = grounded, "No" = hallucinated
 
     uniprot_query: str  # LLM-generated query for UniProt
     uniprot_answer: str  # LLM-generated answer from UniProt
+    uniprot_context: list[Document]  # Retrieved docs used to generate uniprot_answer
     uniprot_completeness: str  # LLM-assessed completeness of the UniProt answer
+    uniprot_hallucination: str  # "Yes" = grounded, "No" = hallucinated
 
+    web_search_results: list[WebSearchResult]  # Tavily results when both DBs are incomplete
 
 class CrossDatabaseGraphBuilder(BaseGraphBuilder):
     def __init__(
@@ -42,6 +53,7 @@ def __init__(
         self.uniprot_rag: Runnable = create_uniprot_rag(llm, embedding)
 
         self.completeness_checker = create_completeness_grader(llm)
+        self.hallucination_grader = create_hallucination_grader(llm)
         self.write_reactome_query = create_reactome_rewriter_w_uniprot(llm)
         self.write_uniprot_query = create_uniprot_rewriter_w_reactome(llm)
         self.summarize_final_answer = create_reactome_uniprot_summarizer(
@@ -55,13 +67,16 @@ def __init__(
         state_graph.add_node("preprocess_question", self.preprocess)
         state_graph.add_node("conduct_research", self.conduct_research)
         state_graph.add_node("generate_reactome_answer", self.generate_reactome_answer)
+        state_graph.add_node("check_reactome_hallucination", self.check_reactome_hallucination)
         state_graph.add_node("rewrite_reactome_query", self.rewrite_reactome_query)
         state_graph.add_node("rewrite_reactome_answer", self.rewrite_reactome_answer)
         state_graph.add_node("generate_uniprot_answer", self.generate_uniprot_answer)
+        state_graph.add_node("check_uniprot_hallucination", self.check_uniprot_hallucination)
         state_graph.add_node("rewrite_uniprot_query", self.rewrite_uniprot_query)
         state_graph.add_node("rewrite_uniprot_answer", self.rewrite_uniprot_answer)
         state_graph.add_node("assess_completeness", self.assess_completeness)
         state_graph.add_node("decide_next_steps", self.decide_next_steps)
+        state_graph.add_node("perform_web_search", self.perform_web_search)
         state_graph.add_node("generate_final_response", self.generate_final_response)
         state_graph.add_node("postprocess", self.postprocess)
         # Set up edges
@@ -74,14 +89,18 @@ def __init__(
         )
         state_graph.add_edge("conduct_research", "generate_reactome_answer")
         state_graph.add_edge("conduct_research", "generate_uniprot_answer")
-        state_graph.add_edge("generate_reactome_answer", "assess_completeness")
-        state_graph.add_edge("generate_uniprot_answer", "assess_completeness")
+        # Hallucination checks run immediately after each DB answer is generated
+        state_graph.add_edge("generate_reactome_answer", "check_reactome_hallucination")
+        state_graph.add_edge("generate_uniprot_answer", "check_uniprot_hallucination")
+        # Both hallucination checks feed into completeness assessment
+        state_graph.add_edge("check_reactome_hallucination", "assess_completeness")
+        state_graph.add_edge("check_uniprot_hallucination", "assess_completeness")
         state_graph.add_conditional_edges(
             "assess_completeness",
             self.decide_next_steps,
             {
                 "generate_final_response": "generate_final_response",
-                "perform_web_search": "generate_final_response",
+                "perform_web_search": "perform_web_search",
                 "rewrite_reactome_query": "rewrite_reactome_query",
                 "rewrite_uniprot_query": "rewrite_uniprot_query",
             },
@@ -90,6 +109,7 @@ def __init__(
         state_graph.add_edge("rewrite_uniprot_query", "rewrite_uniprot_answer")
         state_graph.add_edge("rewrite_reactome_answer", "generate_final_response")
         state_graph.add_edge("rewrite_uniprot_answer", "generate_final_response")
+        state_graph.add_edge("perform_web_search", "generate_final_response")
         state_graph.add_edge("generate_final_response", "postprocess")
         state_graph.set_finish_point("postprocess")
 
@@ -116,26 +136,58 @@ async def conduct_research(
     async def generate_reactome_answer(
         self, state: CrossDatabaseState, config: RunnableConfig
     ) -> CrossDatabaseState:
-        reactome_answer: dict[str, Any] = await self.reactome_rag.ainvoke(
+        reactome_result: dict[str, Any] = await self.reactome_rag.ainvoke(
             {
                 "input": state["rephrased_input"],
                 "chat_history": state["chat_history"],
             },
             config,
         )
-        return CrossDatabaseState(reactome_answer=reactome_answer["answer"])
+        return CrossDatabaseState(
+            reactome_answer=reactome_result["answer"],
+            reactome_context=reactome_result.get("context", []),
+        )
+
+    async def check_reactome_hallucination(
+        self, state: CrossDatabaseState, config: RunnableConfig
+    ) -> CrossDatabaseState:
+        """Grade whether the Reactome answer is grounded in its retrieved documents."""
+        grade: HallucinationGrade = await self.hallucination_grader.ainvoke(
+            {
+                "documents": format_documents(state.get("reactome_context", [])),
+                "generation": state["reactome_answer"],
+            },
+            config,
+        )
+        return CrossDatabaseState(reactome_hallucination=grade.binary_score)
 
     async def generate_uniprot_answer(
         self, state: CrossDatabaseState, config: RunnableConfig
     ) -> CrossDatabaseState:
-        uniprot_answer: dict[str, Any] = await self.uniprot_rag.ainvoke(
+        uniprot_result: dict[str, Any] = await self.uniprot_rag.ainvoke(
             {
                 "input": state["rephrased_input"],
                 "chat_history": state["chat_history"],
             },
             config,
         )
-        return CrossDatabaseState(uniprot_answer=uniprot_answer["answer"])
+        return CrossDatabaseState(
+            uniprot_answer=uniprot_result["answer"],
+            uniprot_context=uniprot_result.get("context", []),
+        )
+
+    async def check_uniprot_hallucination(
+        self, state: CrossDatabaseState, config: RunnableConfig
+    ) -> CrossDatabaseState:
+        """Grade whether the UniProt answer is grounded in its retrieved documents."""
+        grade: HallucinationGrade = await self.hallucination_grader.ainvoke(
+            {
+                "documents": format_documents(state.get("uniprot_context", [])),
+                "generation": state["uniprot_answer"],
+            },
+            config,
+        )
+        return CrossDatabaseState(uniprot_hallucination=grade.binary_score)
 
     async def rewrite_reactome_query(
         self, state: CrossDatabaseState, config: RunnableConfig
@@ -223,12 +275,16 @@ async def decide_next_steps(self, state: CrossDatabaseState) -> Literal[
     async def generate_final_response(
         self, state: CrossDatabaseState, config: RunnableConfig
     ) -> CrossDatabaseState:
+        web_results_text = TavilyWrapper.format_results(
+            state.get("web_search_results", [])
+        )
         final_response: str = await self.summarize_final_answer.ainvoke(
             {
                 "input": state["rephrased_input"],
                 "detected_language": state["detected_language"],
                 "reactome_answer": state["reactome_answer"],
                 "uniprot_answer": state["uniprot_answer"],
+                "web_results": web_results_text,
             },
             config,
         )

diff --git a/src/agent/tasks/cross_database/summarize_reactome_uniprot.py b/src/agent/tasks/cross_database/summarize_reactome_uniprot.py
@@ -4,39 +4,32 @@
 from langchain_core.runnables import Runnable
 
 summarization_message = """
-You are an expert in molecular biology with significant experience as a curator for the UniProt Database adn the Reactome Pathway Knowledgebase.
-Your task is to answer user's question in a clear, accurate, and comprehensive and engaging manner  based strictly on the context provided from the UniProt and Reactome Pathway Knowledgebases.
+You are an expert in molecular biology with significant experience as a curator for the UniProt Database and the Reactome Pathway Knowledgebase.
+Your task is to answer the user's question in a clear, accurate, comprehensive, and engaging manner.
 
-Instructions:
-    1. Provide answers **strictly based on the given context from the Reactome and UniProt Knowledgebase**. Do **not** use or infer information from any external sources.
-    2. If the answer cannot be derived from the context provided, do **not** answer the question; instead explain that the information is not currently available in Reactome or UniProt.
-    3. Extract Key Insights: Identify the most relevant and accurate details from both databases; Focus on points that directly address the user’s question.
-    4. Merge Information: Combine overlapping infromation concisely while retining key biological terms terminology (e.g., gene names, protein names, pathway names, disease involvement, etc.)
-    5. Ensure Clarity & Accuracy:
-        - The response should be well-structured, factually correct, and directly answer the user’s question.
-        - Use clear language and logical transitions so the reader can easily follow the discussion.
-    4. Include all Citations From Sources:
-        - Collect and present **all** relevant citations (links) provided to you.
-        - Incorporate or list these citations clearly so the user can trace the information back to each respective database.
-            - Example:
-                - Reactome Citations:
-                    - <a href="https://reactome.org/content/detail/R-HSA-109581">Apoptosis</a>
-                    - <a href="https://reactome.org/content/detail/R-HSA-1640170">Cell Cycle</a>
-                - UniProt Citations:
-                    - <a href="https://www.uniprot.org/uniprotkb/Q92908">GATA6</a>
-                    - <a href="https://www.uniprot.org/uniprotkb/O00482">NR5A2</a>
+IMPORTANT:
+1. **Language**: You MUST provide the answer in the following language: **{detected_language}**.
+2. **Context**: Base your answer strictly on the provided context from UniProt, Reactome, and (if provided) external web search results.
+3. **Accuracy**: Maintain exact biological terminology (gene names, protein IDs, pathway names, etc.) even when translating the explanation.
+4. **Citations**: Include all provided links/citations.
 
-    5. Answer in the Language requested.
-    6. Write in a conversational and engaging tone suitable for a chatbot.
-    6. Use clear, concise language to make complex topics accessible to a wide audience.
+Instructions:
+    1. Provide answers strictly based on the provided context. Do **not** use or infer information from external knowledge not provided here.
+    2. If the answer cannot be derived from the context, explain that the information is not currently available in Reactome or UniProt in the requested language.
+    3. Merge information concisely while retaining key terminology.
+    4. Format citations clearly:
+        - Reactome Citations: List links provided in the Reactome context.
+        - UniProt Citations: List links provided in the UniProt context.
+        - External Sources: List links from the Web Search results if available.
+    5. Write in a conversational and engaging tone suitable for a scientific chatbot.
 """
 
 summarizer_prompt = ChatPromptTemplate.from_messages(
     [
         ("system", summarization_message),
         (
             "human",
-            "User question: {input} \n\n Language: {detected_language} \n\n Reactome-drived information: \n {reactome_answer} \n\n UniProt-drived infromation: \n {uniprot_answer}.",
+            "User question: {input} \n\n Target Language: {detected_language} \n\n Reactome Information: \n {reactome_answer} \n\n UniProt Information: \n {uniprot_answer} \n\n Web Search Results (optional): \n {web_results}",
         ),
     ]
 )

diff --git a/src/agent/tasks/hallucination_grader.py b/src/agent/tasks/hallucination_grader.py
@@ -0,0 +1,57 @@
+from langchain_core.documents import Document
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import Runnable
+from pydantic import BaseModel, Field
+
+hallucination_grader_message = """\
+You are an expert scientific fact-checker with deep knowledge of molecular biology, \
+the Reactome Pathway Knowledgebase, and the UniProt Knowledgebase.
+
+Your task is to assess whether a given LLM-generated answer is **grounded** in the \
+provided source documents. An answer is grounded if every factual claim it makes can \
+be directly traced to the retrieved context below.
+
+Respond with a binary output:
+    - Yes: Every factual claim in the answer is supported by the retrieved documents.
+    - No: The answer contains at least one claim that is NOT supported by the \
+retrieved documents (i.e., hallucinated or fabricated).
+
+Do NOT penalise an answer for being incomplete — only penalise unsupported claims.
+Do NOT use any external knowledge; judge only against the provided documents.
+"""
+
+hallucination_grader_prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", hallucination_grader_message),
+        (
+            "human",
+            "Retrieved documents:\n\n{documents}\n\nLLM generation:\n\n{generation}",
+        ),
+    ]
+)
+
+
+class HallucinationGrade(BaseModel):
+    binary_score: str = Field(
+        description=(
+            "Indicates whether the answer is grounded in the retrieved documents. "
+            "'Yes' means fully grounded, 'No' means at least one hallucinated claim."
+        )
+    )
+    reason: str = Field(
+        default="",
+        description=(
+            "If binary_score is 'No', briefly state which claim is not supported. "
+            "Leave empty when fully grounded."
+        ),
+    )
+
+
+def format_documents(documents: list[Document]) -> str:
+    """Concatenate document page content for prompt injection."""
+    return "\n\n".join(doc.page_content for doc in documents)
+
+
+def create_hallucination_grader(llm: BaseChatModel) -> Runnable:
+    return hallucination_grader_prompt | llm.with_structured_output(HallucinationGrade)
diff --git a/src/agent/tasks/rephrase.py b/src/agent/tasks/rephrase.py
@@ -4,16 +4,14 @@
 from langchain_core.runnables import Runnable
 
 contextualize_q_system_prompt = """
-You are an expert in question formulation with deep expertise in molecular biology and experience as a Reactome curator. Your task is to analyze the conversation history and the user’s latest query to fully understand their intent and what they seek to learn.
-If the user's question is not in English, reformulate the question and translate it to English, ensuring the meaning and intent are preserved.
-Reformulate the user’s question into a standalone version that retains its full meaning without requiring prior context. The reformulated question should be:
-    - Clear, concise, and precise
-    - Optimized for both vector search (semantic meaning) and case-sensitive keyword search
-    - Faithful to the user’s intent and scientific accuracy
+You are an expert in question formulation for molecular biology.
+Your task is to analyze the conversation history and the user's latest query to create a standalone version of the question.
 
-the returned question should always be in English.
-If the user’s question is already in English, self-contained and well-formed, return it as is.
-Do NOT answer the question or provide explanations.
+IMPORTANT:
+- If the user's question is NOT in English, translate it to English for this step.
+- Internal Search Optimization: This English translation is strictly for optimizing vector search and keyword matching in the Reactome and UniProt databases.
+- The standalone question should be clear, concise, and scientifically accurate.
+- Do NOT answer the question. Only return the reformulated English question.
 """
 
 contextualize_q_prompt = ChatPromptTemplate.from_messages(