In [12]:
!pip install langchain-google-genai google-generativeai

Collecting langchain-google-genai
  Downloading langchain_google_genai-3.0.0-py3-none-any.whl.metadata (7.1 kB)
Collecting langchain-core<2.0.0,>=1.0.0 (from langchain-google-genai)
  Downloading langchain_core-1.0.3-py3-none-any.whl.metadata (3.5 kB)
Collecting google-ai-generativelanguage<1.0.0,>=0.7.0 (from langchain-google-genai)
  Downloading google_ai_generativelanguage-0.9.0-py3-none-any.whl.metadata (10 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
INFO: pip is looking at multiple versions of google-generativeai to determine which version is compatible with other requirements. This could take a while.
Collecting google-generativeai
  Downloading google_generativeai-0.8.4-py3-none-any.whl.metadata (4.2 kB)
  Downloading google_generativeai-0.8.3-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.8.2-py3-none-any.whl.metadata (3.9 kB)
  Downloading google_generativeai-0.8

In [13]:
from sentence_transformers import SentenceTransformer, util
from langchain_google_genai import ChatGoogleGenerativeAI
import textwrap

In [14]:
import os
os.environ["GOOGLE_API_KEY"] = "API_KEY"

In [15]:
embedder = SentenceTransformer('all-MiniLM-L6-v2')

llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash", temperature=0.7)

In [16]:
document = """
Contextual Augmented Generation (CAG) is an advancement over Retrieval Augmented Generation (RAG).
Unlike RAG, which simply retrieves and attaches chunks of text, CAG enriches each chunk by adding contextual cues.
This allows the model to understand the relationship between different sections of a document.
As a result, it can generate more coherent and contextually aware answers.
CAG is particularly useful for chatbots, customer support, and multi-turn dialogue systems.
"""


In [17]:
def create_chunks(text, chunk_size=10):
    words = text.split()
    chunks = [' '.join(words[i:i + chunk_size]) for i in range(0, len(words), chunk_size)]
    return chunks

In [18]:
chunks = create_chunks(document)
print("Original Chunks:\n")
for i, ch in enumerate(chunks):
    print(f"Chunk {i+1}:", textwrap.fill(ch, width=80), "\n")

Original Chunks:

Chunk 1: Contextual Augmented Generation (CAG) is an advancement over Retrieval Augmented 

Chunk 2: Generation (RAG). Unlike RAG, which simply retrieves and attaches chunks 

Chunk 3: of text, CAG enriches each chunk by adding contextual cues. 

Chunk 4: This allows the model to understand the relationship between different 

Chunk 5: sections of a document. As a result, it can generate 

Chunk 6: more coherent and contextually aware answers. CAG is particularly useful 

Chunk 7: for chatbots, customer support, and multi-turn dialogue systems. 



In [25]:
def enrich_chunk(chunk, full_doc):
    prompt = f"""
You are enhancing chunks of a document for contextual retrieval.

<document>
{full_doc}
</document>

Here is the specific chunk:
<chunk>
{chunk}
</chunk>

Task:
1. Add only minimal contextual details strictly derived from the document above to make this chunk self-contained.
2. Do NOT add or invent any external or new information.
3. Return the enriched version of the chunk directly without extra commentary.

Output the enriched chunk only.
"""
    response = llm.invoke(prompt)
    return response.content.strip()


In [26]:
print("\nEnriched Chunks:\n")
enriched_chunks = []
for ch in chunks:
    enriched_text = enrich_chunk(ch, document)
    enriched_chunks.append(enriched_text)
    print("-", enriched_text, "\n")


Enriched Chunks:

- Contextual Augmented Generation (CAG) is an advancement over Retrieval Augmented Generation (RAG). 

- Retrieval Augmented Generation (RAG). Unlike RAG, which simply retrieves and attaches chunks of text 

- Unlike Retrieval Augmented Generation (RAG), which simply retrieves and attaches chunks of text, Contextual Augmented Generation (CAG) enriches each chunk by adding contextual cues. 

- CAG enriching each chunk by adding contextual cues allows the model to understand the relationship between different sections of a document. 

- This allows the model to understand the relationship between different sections of a document. As a result, it can generate more coherent and contextually aware answers. 

- Contextual Augmented Generation (CAG) enables the generation of more coherent and contextually aware answers. CAG is particularly useful 

- CAG is particularly useful for chatbots, customer support, and multi-turn dialogue systems. 



In [27]:

enriched_embeddings = embedder.encode(enriched_chunks, convert_to_tensor=True)

In [28]:
enriched_embeddings

tensor([[-0.0854, -0.0085,  0.0102,  ...,  0.0299, -0.0105,  0.0170],
        [-0.0985,  0.0346, -0.0097,  ...,  0.0299,  0.0094,  0.0259],
        [-0.0808,  0.0076,  0.0279,  ...,  0.0589,  0.0534, -0.0006],
        ...,
        [-0.0564, -0.0044, -0.0322,  ...,  0.1161,  0.0706,  0.0060],
        [-0.0303, -0.0713, -0.0349,  ...,  0.1081,  0.0149,  0.0103],
        [-0.0820, -0.0418,  0.0547,  ...,  0.0692,  0.0579, -0.0302]],
       device='cuda:0')

In [29]:
def retrieve_context(query, top_k=2):
    query_embedding = embedder.encode(query, convert_to_tensor=True)
    scores = util.cos_sim(query_embedding, enriched_embeddings)[0]
    top_indices = scores.argsort(descending=True)[:top_k]
    return [enriched_chunks[i] for i in top_indices]

In [30]:
def answer_question(question, enriched_chunks, embeddings, top_k=2):
    question_embedding = embedder.encode(question, convert_to_tensor=True)
    cos_scores = util.pytorch_cos_sim(question_embedding, embeddings)[0]
    top_results = cos_scores.topk(k=top_k)
    context = "\n".join([enriched_chunks[idx] for idx in top_results[1]])
    prompt = f"""
You are an intelligent assistant. Use only the provided context to answer the question clearly.

Context:
{context}

Question:
{question}

Answer briefly and factually based only on the context.
"""
    response = llm.invoke(prompt)
    return response.content.strip()


In [32]:
question = "How does CAG differ from RAG?"

answer = answer_question(question, enriched_chunks, enriched_embeddings)

print("\nQuestion:", question)
print("\nAnswer:", answer)


Question: How does CAG differ from RAG?

Answer: CAG enriches each retrieved chunk by adding contextual cues, whereas RAG simply retrieves and attaches chunks of text.
