From 43590b8c434fc99bbed7b1a2a31ea5bf820d327c Mon Sep 17 00:00:00 2001 From: bhavyakeerthi3 Date: Fri, 6 Mar 2026 01:27:24 +0530 Subject: [PATCH 1/3] docs: add Windows PYTHONPATH instructions to README --- README.md | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index f38a4d3..bbe7037 100644 --- a/README.md +++ b/README.md @@ -30,11 +30,19 @@ Follow these steps to run the barebones Chainlit application. ```bash poetry install ``` -4. Verify your `PYTHONPATH` environment variable includes `./src`: +4. Verify your `PYTHONPATH` environment variable includes `./src`. + + Linux / macOS: ```bash echo $PYTHONPATH # ./src ``` + + Windows (PowerShell): + ```powershell + echo $env:PYTHONPATH + # ./src + ``` 5. List embeddings available for download: ```bash ./bin/embeddings_manager ls-remote @@ -82,7 +90,7 @@ Follow these steps to run the complete application in Docker. ```bash docker build -t reactome-chatbot . ``` -6. Start the Chainlit application and PostgrSQL database in Docker containers: +6. Start the Chainlit application and PostgreSQL database in Docker containers: ```bash docker-compose up From dd50760a7e843658c09efc9e1779c01ee524072b Mon Sep 17 00:00:00 2001 From: bhavyakeerthi3 Date: Fri, 6 Mar 2026 16:00:46 +0530 Subject: [PATCH 2/3] feat: add hallucination grader and fix web search dead branch --- src/agent/profiles/cross_database.py | 66 ++++++++++++++++++++++--- src/agent/tasks/hallucination_grader.py | 57 +++++++++++++++++++++ 2 files changed, 116 insertions(+), 7 deletions(-) create mode 100644 src/agent/tasks/hallucination_grader.py diff --git a/src/agent/profiles/cross_database.py b/src/agent/profiles/cross_database.py index 31ab21a..4d94772 100644 --- a/src/agent/profiles/cross_database.py +++ b/src/agent/profiles/cross_database.py @@ -1,5 +1,6 @@ from typing import Any, Literal +from langchain_core.documents import Document from langchain_core.embeddings import Embeddings from langchain_core.language_models.chat_models import BaseChatModel from langchain_core.messages import AIMessage, HumanMessage @@ -15,19 +16,29 @@ create_uniprot_rewriter_w_reactome from agent.tasks.cross_database.summarize_reactome_uniprot import \ create_reactome_uniprot_summarizer +from agent.tasks.hallucination_grader import (HallucinationGrade, + create_hallucination_grader, + format_documents) from retrievers.reactome.rag import create_reactome_rag from retrievers.uniprot.rag import create_uniprot_rag +from tools.external_search.state import WebSearchResult +from tools.external_search.tavily_wrapper import TavilyWrapper class CrossDatabaseState(BaseState): reactome_query: str # LLM-generated query for Reactome reactome_answer: str # LLM-generated answer from Reactome + reactome_context: list[Document] # Retrieved docs used to generate reactome_answer reactome_completeness: str # LLM-assessed completeness of the Reactome answer + reactome_hallucination: str # "Yes" = grounded, "No" = hallucinated uniprot_query: str # LLM-generated query for UniProt uniprot_answer: str # LLM-generated answer from UniProt + uniprot_context: list[Document] # Retrieved docs used to generate uniprot_answer uniprot_completeness: str # LLM-assessed completeness of the UniProt answer + uniprot_hallucination: str # "Yes" = grounded, "No" = hallucinated + web_search_results: list[WebSearchResult] # Tavily results when both DBs are incomplete class CrossDatabaseGraphBuilder(BaseGraphBuilder): def __init__( @@ -42,6 +53,7 @@ def __init__( self.uniprot_rag: Runnable = create_uniprot_rag(llm, embedding) self.completeness_checker = create_completeness_grader(llm) + self.hallucination_grader = create_hallucination_grader(llm) self.write_reactome_query = create_reactome_rewriter_w_uniprot(llm) self.write_uniprot_query = create_uniprot_rewriter_w_reactome(llm) self.summarize_final_answer = create_reactome_uniprot_summarizer( @@ -55,13 +67,16 @@ def __init__( state_graph.add_node("preprocess_question", self.preprocess) state_graph.add_node("conduct_research", self.conduct_research) state_graph.add_node("generate_reactome_answer", self.generate_reactome_answer) + state_graph.add_node("check_reactome_hallucination", self.check_reactome_hallucination) state_graph.add_node("rewrite_reactome_query", self.rewrite_reactome_query) state_graph.add_node("rewrite_reactome_answer", self.rewrite_reactome_answer) state_graph.add_node("generate_uniprot_answer", self.generate_uniprot_answer) + state_graph.add_node("check_uniprot_hallucination", self.check_uniprot_hallucination) state_graph.add_node("rewrite_uniprot_query", self.rewrite_uniprot_query) state_graph.add_node("rewrite_uniprot_answer", self.rewrite_uniprot_answer) state_graph.add_node("assess_completeness", self.assess_completeness) state_graph.add_node("decide_next_steps", self.decide_next_steps) + state_graph.add_node("perform_web_search", self.perform_web_search) state_graph.add_node("generate_final_response", self.generate_final_response) state_graph.add_node("postprocess", self.postprocess) # Set up edges @@ -74,14 +89,18 @@ def __init__( ) state_graph.add_edge("conduct_research", "generate_reactome_answer") state_graph.add_edge("conduct_research", "generate_uniprot_answer") - state_graph.add_edge("generate_reactome_answer", "assess_completeness") - state_graph.add_edge("generate_uniprot_answer", "assess_completeness") + # Hallucination checks run immediately after each DB answer is generated + state_graph.add_edge("generate_reactome_answer", "check_reactome_hallucination") + state_graph.add_edge("generate_uniprot_answer", "check_uniprot_hallucination") + # Both hallucination checks feed into completeness assessment + state_graph.add_edge("check_reactome_hallucination", "assess_completeness") + state_graph.add_edge("check_uniprot_hallucination", "assess_completeness") state_graph.add_conditional_edges( "assess_completeness", self.decide_next_steps, { "generate_final_response": "generate_final_response", - "perform_web_search": "generate_final_response", + "perform_web_search": "perform_web_search", "rewrite_reactome_query": "rewrite_reactome_query", "rewrite_uniprot_query": "rewrite_uniprot_query", }, @@ -90,6 +109,7 @@ def __init__( state_graph.add_edge("rewrite_uniprot_query", "rewrite_uniprot_answer") state_graph.add_edge("rewrite_reactome_answer", "generate_final_response") state_graph.add_edge("rewrite_uniprot_answer", "generate_final_response") + state_graph.add_edge("perform_web_search", "generate_final_response") state_graph.add_edge("generate_final_response", "postprocess") state_graph.set_finish_point("postprocess") @@ -116,26 +136,58 @@ async def conduct_research( async def generate_reactome_answer( self, state: CrossDatabaseState, config: RunnableConfig ) -> CrossDatabaseState: - reactome_answer: dict[str, Any] = await self.reactome_rag.ainvoke( + reactome_result: dict[str, Any] = await self.reactome_rag.ainvoke( { "input": state["rephrased_input"], "chat_history": state["chat_history"], }, config, ) - return CrossDatabaseState(reactome_answer=reactome_answer["answer"]) + return CrossDatabaseState( + reactome_answer=reactome_result["answer"], + reactome_context=reactome_result.get("context", []), + ) + + async def check_reactome_hallucination( + self, state: CrossDatabaseState, config: RunnableConfig + ) -> CrossDatabaseState: + """Grade whether the Reactome answer is grounded in its retrieved documents.""" + grade: HallucinationGrade = await self.hallucination_grader.ainvoke( + { + "documents": format_documents(state.get("reactome_context", [])), + "generation": state["reactome_answer"], + }, + config, + ) + return CrossDatabaseState(reactome_hallucination=grade.binary_score) async def generate_uniprot_answer( self, state: CrossDatabaseState, config: RunnableConfig ) -> CrossDatabaseState: - uniprot_answer: dict[str, Any] = await self.uniprot_rag.ainvoke( + uniprot_result: dict[str, Any] = await self.uniprot_rag.ainvoke( { "input": state["rephrased_input"], "chat_history": state["chat_history"], }, config, ) - return CrossDatabaseState(uniprot_answer=uniprot_answer["answer"]) + return CrossDatabaseState( + uniprot_answer=uniprot_result["answer"], + uniprot_context=uniprot_result.get("context", []), + ) + + async def check_uniprot_hallucination( + self, state: CrossDatabaseState, config: RunnableConfig + ) -> CrossDatabaseState: + """Grade whether the UniProt answer is grounded in its retrieved documents.""" + grade: HallucinationGrade = await self.hallucination_grader.ainvoke( + { + "documents": format_documents(state.get("uniprot_context", [])), + "generation": state["uniprot_answer"], + }, + config, + ) + return CrossDatabaseState(uniprot_hallucination=grade.binary_score) async def rewrite_reactome_query( self, state: CrossDatabaseState, config: RunnableConfig diff --git a/src/agent/tasks/hallucination_grader.py b/src/agent/tasks/hallucination_grader.py new file mode 100644 index 0000000..3ccd9c6 --- /dev/null +++ b/src/agent/tasks/hallucination_grader.py @@ -0,0 +1,57 @@ +from langchain_core.documents import Document +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.prompts import ChatPromptTemplate +from langchain_core.runnables import Runnable +from pydantic import BaseModel, Field + +hallucination_grader_message = """\ +You are an expert scientific fact-checker with deep knowledge of molecular biology, \ +the Reactome Pathway Knowledgebase, and the UniProt Knowledgebase. + +Your task is to assess whether a given LLM-generated answer is **grounded** in the \ +provided source documents. An answer is grounded if every factual claim it makes can \ +be directly traced to the retrieved context below. + +Respond with a binary output: + - Yes: Every factual claim in the answer is supported by the retrieved documents. + - No: The answer contains at least one claim that is NOT supported by the \ +retrieved documents (i.e., hallucinated or fabricated). + +Do NOT penalise an answer for being incomplete — only penalise unsupported claims. +Do NOT use any external knowledge; judge only against the provided documents. +""" + +hallucination_grader_prompt = ChatPromptTemplate.from_messages( + [ + ("system", hallucination_grader_message), + ( + "human", + "Retrieved documents:\n\n{documents}\n\nLLM generation:\n\n{generation}", + ), + ] +) + + +class HallucinationGrade(BaseModel): + binary_score: str = Field( + description=( + "Indicates whether the answer is grounded in the retrieved documents. " + "'Yes' means fully grounded, 'No' means at least one hallucinated claim." + ) + ) + reason: str = Field( + default="", + description=( + "If binary_score is 'No', briefly state which claim is not supported. " + "Leave empty when fully grounded." + ), + ) + + +def format_documents(documents: list[Document]) -> str: + """Concatenate document page content for prompt injection.""" + return "\n\n".join(doc.page_content for doc in documents) + + +def create_hallucination_grader(llm: BaseChatModel) -> Runnable: + return hallucination_grader_prompt | llm.with_structured_output(HallucinationGrade) From 5fb6e53cb5beaf08dccf891b0afdcc872e10a612 Mon Sep 17 00:00:00 2001 From: bhavyakeerthi3 Date: Sat, 7 Mar 2026 17:43:14 +0530 Subject: [PATCH 3/3] feat: add multi-language support and complete web search integration --- src/agent/profiles/cross_database.py | 4 ++ .../summarize_reactome_uniprot.py | 41 ++++++++----------- src/agent/tasks/rephrase.py | 16 ++++---- 3 files changed, 28 insertions(+), 33 deletions(-) diff --git a/src/agent/profiles/cross_database.py b/src/agent/profiles/cross_database.py index 4d94772..1274566 100644 --- a/src/agent/profiles/cross_database.py +++ b/src/agent/profiles/cross_database.py @@ -275,12 +275,16 @@ async def decide_next_steps(self, state: CrossDatabaseState) -> Literal[ async def generate_final_response( self, state: CrossDatabaseState, config: RunnableConfig ) -> CrossDatabaseState: + web_results_text = TavilyWrapper.format_results( + state.get("web_search_results", []) + ) final_response: str = await self.summarize_final_answer.ainvoke( { "input": state["rephrased_input"], "detected_language": state["detected_language"], "reactome_answer": state["reactome_answer"], "uniprot_answer": state["uniprot_answer"], + "web_results": web_results_text, }, config, ) diff --git a/src/agent/tasks/cross_database/summarize_reactome_uniprot.py b/src/agent/tasks/cross_database/summarize_reactome_uniprot.py index 8ea1746..b5ba698 100644 --- a/src/agent/tasks/cross_database/summarize_reactome_uniprot.py +++ b/src/agent/tasks/cross_database/summarize_reactome_uniprot.py @@ -4,31 +4,24 @@ from langchain_core.runnables import Runnable summarization_message = """ -You are an expert in molecular biology with significant experience as a curator for the UniProt Database adn the Reactome Pathway Knowledgebase. -Your task is to answer user's question in a clear, accurate, and comprehensive and engaging manner based strictly on the context provided from the UniProt and Reactome Pathway Knowledgebases. +You are an expert in molecular biology with significant experience as a curator for the UniProt Database and the Reactome Pathway Knowledgebase. +Your task is to answer the user's question in a clear, accurate, comprehensive, and engaging manner. -Instructions: - 1. Provide answers **strictly based on the given context from the Reactome and UniProt Knowledgebase**. Do **not** use or infer information from any external sources. - 2. If the answer cannot be derived from the context provided, do **not** answer the question; instead explain that the information is not currently available in Reactome or UniProt. - 3. Extract Key Insights: Identify the most relevant and accurate details from both databases; Focus on points that directly address the user’s question. - 4. Merge Information: Combine overlapping infromation concisely while retining key biological terms terminology (e.g., gene names, protein names, pathway names, disease involvement, etc.) - 5. Ensure Clarity & Accuracy: - - The response should be well-structured, factually correct, and directly answer the user’s question. - - Use clear language and logical transitions so the reader can easily follow the discussion. - 4. Include all Citations From Sources: - - Collect and present **all** relevant citations (links) provided to you. - - Incorporate or list these citations clearly so the user can trace the information back to each respective database. - - Example: - - Reactome Citations: - - Apoptosis - - Cell Cycle - - UniProt Citations: - - GATA6 - - NR5A2 +IMPORTANT: +1. **Language**: You MUST provide the answer in the following language: **{detected_language}**. +2. **Context**: Base your answer strictly on the provided context from UniProt, Reactome, and (if provided) external web search results. +3. **Accuracy**: Maintain exact biological terminology (gene names, protein IDs, pathway names, etc.) even when translating the explanation. +4. **Citations**: Include all provided links/citations. - 5. Answer in the Language requested. - 6. Write in a conversational and engaging tone suitable for a chatbot. - 6. Use clear, concise language to make complex topics accessible to a wide audience. +Instructions: + 1. Provide answers strictly based on the provided context. Do **not** use or infer information from external knowledge not provided here. + 2. If the answer cannot be derived from the context, explain that the information is not currently available in Reactome or UniProt in the requested language. + 3. Merge information concisely while retaining key terminology. + 4. Format citations clearly: + - Reactome Citations: List links provided in the Reactome context. + - UniProt Citations: List links provided in the UniProt context. + - External Sources: List links from the Web Search results if available. + 5. Write in a conversational and engaging tone suitable for a scientific chatbot. """ summarizer_prompt = ChatPromptTemplate.from_messages( @@ -36,7 +29,7 @@ ("system", summarization_message), ( "human", - "User question: {input} \n\n Language: {detected_language} \n\n Reactome-drived information: \n {reactome_answer} \n\n UniProt-drived infromation: \n {uniprot_answer}.", + "User question: {input} \n\n Target Language: {detected_language} \n\n Reactome Information: \n {reactome_answer} \n\n UniProt Information: \n {uniprot_answer} \n\n Web Search Results (optional): \n {web_results}", ), ] ) diff --git a/src/agent/tasks/rephrase.py b/src/agent/tasks/rephrase.py index 1851747..cc2c5d0 100644 --- a/src/agent/tasks/rephrase.py +++ b/src/agent/tasks/rephrase.py @@ -4,16 +4,14 @@ from langchain_core.runnables import Runnable contextualize_q_system_prompt = """ -You are an expert in question formulation with deep expertise in molecular biology and experience as a Reactome curator. Your task is to analyze the conversation history and the user’s latest query to fully understand their intent and what they seek to learn. -If the user's question is not in English, reformulate the question and translate it to English, ensuring the meaning and intent are preserved. -Reformulate the user’s question into a standalone version that retains its full meaning without requiring prior context. The reformulated question should be: - - Clear, concise, and precise - - Optimized for both vector search (semantic meaning) and case-sensitive keyword search - - Faithful to the user’s intent and scientific accuracy +You are an expert in question formulation for molecular biology. +Your task is to analyze the conversation history and the user's latest query to create a standalone version of the question. -the returned question should always be in English. -If the user’s question is already in English, self-contained and well-formed, return it as is. -Do NOT answer the question or provide explanations. +IMPORTANT: +- If the user's question is NOT in English, translate it to English for this step. +- Internal Search Optimization: This English translation is strictly for optimizing vector search and keyword matching in the Reactome and UniProt databases. +- The standalone question should be clear, concise, and scientifically accurate. +- Do NOT answer the question. Only return the reformulated English question. """ contextualize_q_prompt = ChatPromptTemplate.from_messages(