In [None]:
!pip install langchain langchain-community wikipedia



In [None]:
!pip install langchain_experimental



In [None]:
import os
import time



# Set Neo4j Aura connection details
os.environ["NEO4J_URI"] = "Your NEO4J_URI"
os.environ["NEO4J_USERNAME"] = "Your NEO4J_USERNAME"
os.environ["NEO4J_PASSWORD"] = "Your NEO4J_PASSWORD"
os.environ["NEO4J_DATABASE"] = "Your NEO4J_DATABASE"

print("Neo4j URI:", os.environ.get("NEO4J_URI"))
print("Neo4j Username:", os.environ.get("NEO4J_USERNAME"))
print("Neo4j Database:", os.environ.get("NEO4J_DATABASE"))


Neo4j URI: neo4j+s://9ab2001a.databases.neo4j.io
Neo4j Username: neo4j
Neo4j Database: neo4j


In [None]:
from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter

# Load raw documents
raw_docs = WikipediaLoader(query="Elizabeth I").load()

# Split into chunks
splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = splitter.split_documents(raw_docs[:3])




  lis = BeautifulSoup(html).find_all('li')


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
import os
os.environ["GOOGLE_API_KEY"] = "Your  Google API Key"

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_community.graphs import Neo4jGraph

# Initialize Google LLM
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0)

# Transform documents into graph format
llm_transformer = LLMGraphTransformer(llm=llm)
graph_docs = llm_transformer.convert_to_graph_documents(documents)

# Initialize Neo4j graph and add documents
graph = Neo4jGraph()
graph.add_graph_documents(graph_docs, baseEntityLabel=True, include_source=True)

# Create a full-text index for entities
graph.query("""
CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (n:Person|Organization|Event|Location) ON EACH [n.id] OPTIONS {
  indexConfig: {
    `fulltext.analyzer`: 'standard',
    `fulltext.eventually_consistent`: true
  }
}
""")

[]

In [None]:
from langchain_community.vectorstores import Neo4jVector
from langchain_google_genai import GoogleGenerativeAIEmbeddings

vector_index = Neo4jVector.from_existing_graph(
    GoogleGenerativeAIEmbeddings(model="text-embedding-004"),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [None]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.prompts import ChatPromptTemplate

class Entities(BaseModel):
    names: list[str] = Field(..., description="Person/organization entities in text")

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are extracting organization and person entities from the text."),
    ("human", "Use the given format to extract info: {question}"),
])

entity_chain = prompt | llm.with_structured_output(Entities)


In [None]:
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars

def generate_full_text_query(input: str) -> str:
    words = [el for el in remove_lucene_chars(input).split() if el]
    return " AND ".join(f"{w}~2" for w in words)

def structured_retriever(question: str) -> str:
    result = ""
    entities = entity_chain.invoke({"question": question}).names
    for entity in entities:
        resp = graph.query("""
            CALL db.index.fulltext.queryNodes('entity', $query, {limit:2})
            YIELD node, score
            CALL {
              WITH node
              MATCH (node)-[r:!MENTIONS]->(neighbor)
              RETURN node.id + ' - ' + type(r) + ' -> ' + neighbor.id AS output
              UNION ALL
              WITH node
              MATCH (node)<-[r:!MENTIONS]-(neighbor)
              RETURN neighbor.id + ' - ' + type(r) + ' -> ' + node.id AS output
            }
            RETURN output LIMIT 50
        """, {"query": generate_full_text_query(entity)})
        result += "\n".join([el['output'] for el in resp])
    return result


In [None]:
def retriever(question: str):
    structured = structured_retriever(question)
    unstructured = [d.page_content for d in vector_index.similarity_search(question)]
    return f"Structured data:\n{structured}\nUnstructured data:\n{'#Document '.join(unstructured)}"


In [None]:
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.runnables import RunnableLambda, RunnableBranch, RunnablePassthrough, RunnableParallel
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.output_parsers import StrOutputParser
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(
    """Given chat history and follow-up question, rewrite it as a standalone question.
    Chat History: {chat_history}
    Follow Up Input: {question}
    Standalone question:"""
)

def _format_chat_history(chat_history):
    buffer = []
    for human, ai in chat_history:
        buffer.append(HumanMessage(content=human))
        buffer.append(AIMessage(content=ai))
    return buffer

_search_query = RunnableBranch(
    (RunnableLambda(lambda x: bool(x.get("chat_history"))),
     RunnablePassthrough.assign(chat_history=lambda x: _format_chat_history(x["chat_history"]))
     | CONDENSE_QUESTION_PROMPT | llm | StrOutputParser()),
    RunnableLambda(lambda x: x["question"])
)

template = ChatPromptTemplate.from_template(
    """Answer the question based only on the following context:
    {context}
    Question: {question}
    Use natural language and be concise.
    Answer:"""
)

chain = RunnableParallel({"context": _search_query | retriever, "question": RunnablePassthrough()}) | template | llm | StrOutputParser()


print(chain.invoke({"question": "When was she born?", "chat_history": [("Which house did Elizabeth I belong to?", "House Of Tudor",)]}))




She was born on 7 September 1533.
