Using an LLM to enhance the discoverability of your content is quickly becoming a baseline requirement for documentation. Thankfully, it’s not too hard to do thanks to Hugo’s output to JSON.
At a high level, you’ll need to provide some server-side code in Python or JS that routes user questions to chatGPT after being passed some embeddings (created from your docs JSON) for context.
How it Works #
This partial sends an API request to a GCP cloud function you’ll need to set up that uses Flask (built in) to:
- Search a Pinecone vector database filled with embeddings created from your documentation.
- Perform a similarity search and return the 4 most relevant chunks.
- Forward those chunks to the OpenAI API via LangChain to perform RAG services.
- Return an answer based on the question and content provided.
have it your way
There are several ways to implement a RAG LLM UX — this is just the way that currently works for me. It seems like in the future people may shift from LangChain to the official Assistant API. Hopefully sharing this implementation helps you achieve yours!
Set Up #
To use this feature, you’re going to need to:
- Set up a Vector DB (doesn’t have to be Pinecone, LangChain supports multiple options).
- Convert your site
index.json
into embeddings and save them to the DB. - Deploy a cloud function that can accept and route questions.
python 3.12
The tiktoken
requirement runs into issues on Python 3.12; for now, I recommend using 3.10 if deploying with a GCP function.
Create & Store Embeddings #
import os
from dotenv import load_dotenv
import time
from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone
load_dotenv()
openai_key = os.environ.get('OPENAI_API_KEY')
pinecone_key = os.environ.get('PINECONE_API_KEY')
pinecone_environment = os.environ.get('PINECONE_ENVIRONMENT')
pinecone_index = os.environ.get('PINECONE_INDEX')
docs_index_path = "./docs.json"
docs_index_schema = ".[]" # [{"body:..."}] -> .[].body; see JSONLoader docs for more info
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0,)
def chunks(lst, n):
"""Yield successive n-sized chunks from lst."""
for i in range(0, len(lst), n):
yield lst[i:i + n]
def metadata_func(record: dict, metadata: dict) -> dict:
metadata["title"] = record.get("title")
metadata["relURI"] = record.get("relURI")
return metadata
loader = JSONLoader(docs_index_path, jq_schema=docs_index_schema, metadata_func=metadata_func, content_key="body")
data = loader.load()
texts = text_splitter.split_documents(data)
pinecone.init(
api_key=pinecone_key,
environment=pinecone_environment,
)
if pinecone_index in pinecone.list_indexes():
print(f'The {pinecone_index} index already exists! We need to replace it with a new one.')
print("Erasing existing index...")
pinecone.delete_index(pinecone_index)
time.sleep(60)
print("Recreating index...")
# wait a minute for the index to be deleted
pinecone.create_index(pinecone_index, metric="cosine", dimension=1536, pods=1, pod_type="p1")
if pinecone_index in pinecone.list_indexes():
print(f"Loading {len(texts)} texts to index {pinecone_index}... \n This may take a while. Here's a preview of the first text: \n {texts[0].metadata} \n {texts[0].page_content}")
for chunk in chunks(texts, 25):
for doc in chunk:
if doc.page_content.strip():
print(f"Indexing: {doc.metadata['title']}")
print(f"Content: {doc.page_content}")
Pinecone.from_texts([doc.page_content], embedding=embeddings, index_name=pinecone_index, metadatas=[doc.metadata])
else:
print("Ignoring blank document")
print("Done!")
Deploy Cloud Function #
import os
import functions_framework
from langchain.llms import OpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone
from langchain.memory import ConversationBufferMemory
openai_key = os.environ.get('OPENAI_API_KEY')
pinecone_key = os.environ.get('PINECONE_API_KEY')
pinecone_environment = os.environ.get('PINECONE_ENVIRONMENT')
pinecone_index = os.environ.get('PINECONE_INDEX')
def convert_to_document(message):
class Document:
def __init__(self, page_content, metadata):
self.page_content = page_content
self.metadata = metadata
return Document(page_content=message, metadata={})
def answer_question(question: str, vs, chain, memory):
relevant_docs = vs.similarity_search(question)
conversation_history = memory.load_memory_variables(inputs={})["history"]
context_window = conversation_history.split("\n")[-3:]
conversation_document = convert_to_document(context_window)
input_documents = relevant_docs + [conversation_document]
answer = chain.run(input_documents=input_documents, question=question)
memory.save_context(inputs={"question": question}, outputs={"answer": answer})
docs_metadata = []
for doc in relevant_docs:
metadata = doc.metadata
if metadata is not None:
doc_metadata = {
"title": metadata.get('title', None),
"relURI": metadata.get('relURI', None)
}
docs_metadata.append(doc_metadata)
return {"answer": answer, "docs": docs_metadata}
llm = OpenAI(temperature=1, openai_api_key=openai_key, max_tokens=-1, streaming=True)
chain = load_qa_chain(llm, chain_type="stuff")
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
docsearch = Pinecone.from_existing_index(pinecone_index, embeddings)
memory = ConversationBufferMemory()
import functions_framework
@functions_framework.http
def start(request):
# For more information about CORS and CORS preflight requests, see:
# https://developer.mozilla.org/en-US/docs/Glossary/Preflight_request
# Set CORS headers for the preflight request
if request.method == 'OPTIONS':
# Allows GET requests from any origin with the Content-Type
# header and caches preflight response for an 3600s
headers = {
'Access-Control-Allow-Origin': '*',
'Access-Control-Allow-Methods': 'GET',
'Access-Control-Allow-Headers': 'Content-Type',
'Access-Control-Max-Age': '3600'
}
return ('', 204, headers)
# Set CORS headers for the main request
headers = {
'Access-Control-Allow-Origin': '*'
}
request_json = request.get_json(silent=True)
request_args = request.args
if request_json and 'query' in request_json:
question = request_json['query']
elif request_args and 'query' in request_args:
question = request_args['query']
else:
question = 'What is MiloDocs?'
return (answer_question(question=question, vs=docsearch, chain=chain, memory=memory), 200, headers)
Source Code #
Help Wanted
If you know how to successfully separate this JS into its own file in assets/js
, please submit a PR. It doesn’t work for me!
<div id="chatContainer" class="hidden sticky top-16 h-[calc(100vh-5rem)] flex flex-col flex justify-end">
<div id="chat-messages" class="flex flex-col overflow-y-auto text-base">
</div>
<div id="chat-controls" class="flex flex-row text-xs mt-2">
<form onsubmit="submitQuestion(event)" class="flex flex-row">
<input id="question" type="text" aria-label="Question Input" placeholder="Ask the docs" class="h-10 border rounded-lg p-1 mr-1 focus:outline-none focus:ring-2 focus:ring-brand" />
<button id="sendButton" aria-label="Send" class="flex items-center bg-brand my-1 hover:bg-black text-white p-1 mr-1 rounded-lg shadow-lg transition duration-300"><img src="/icons/send.svg" alt="Send" class="w-5 h-5"></button>
</form>
<button id="clearAll" aria-label="Delete All" onclick="clearConversation()" class="flex items-center bg-black my-1 hover:bg-red-600 text-white p-1 rounded-lg shadow-lg transition duration-300"><img src="/icons/delete.svg" alt="Delete" class="w-5 h-5"></button>
</div>
</div>
<script>
// Define a function to handle form submission
function submitQuestion(event) {
event.preventDefault();
const questionInput = document.getElementById('question');
const questionText = questionInput.value.trim();
if (!questionText) return; // Exit if the question is empty
questionInput.value = ''; // Clear the input field
addChatBubble(questionText, 'user');
fetchAnswer(questionText);
}
// Define a function to fetch answer from the API
async function fetchAnswer(question) {
const response = await fetch(`https://milodocs-lc4762co7a-uc.a.run.app/?query=${encodeURIComponent(question)}`);
const data = await response.json();
const answer = data.answer || 'Sorry, I could not fetch the answer.';
addChatBubble(answer, 'bot');
}
// Define a function to add chat bubble
function addChatBubble(text, sender) {
const chatMessages = document.getElementById('chat-messages');
let pair = chatMessages.lastElementChild;
if (!pair || !pair.classList.contains('chat-pair') || sender === 'user') {
pair = document.createElement('div');
pair.className = 'chat-pair bg-zinc-100 flex flex-col my-2 p-2 rounded-lg';
chatMessages.appendChild(pair);
}
const bubble = document.createElement('div');
bubble.className = `chat-bubble ${sender} p-2 rounded-lg text-black ${sender === 'user' ? 'font-brand font-semibold' : 'font-brand font-regular'}`;
bubble.innerText = text;
pair.appendChild(bubble);
if (sender === 'user') {
bubble.classList.add('animate-pulse'); // Add pulsing animation to user bubble
} else {
const userBubble = pair.querySelector('.user');
if (userBubble) userBubble.classList.remove('animate-pulse'); // Remove pulsing animation when bot responds
const deleteButtonWrapper = document.createElement('div');
deleteButtonWrapper.className = 'w-full flex justify-end';
const deleteButton = document.createElement('button');
deleteButton.className = 'w-fit p-2 rounded bg-zinc-200 text-xs lowercase hover:bg-red-600 hover:text-white transition duration-300 text-black';
deleteButton.innerText = 'Delete';
deleteButton.addEventListener('click', () => {
chatMessages.removeChild(pair);
saveChatHistory();
});
deleteButtonWrapper.appendChild(deleteButton);
pair.appendChild(deleteButtonWrapper);
}
// Scroll to the bottom of the chat container
chatMessages.scrollTop = chatMessages.scrollHeight;
saveChatHistory();
}
// Define a function to clear conversation
function clearConversation() {
const chatMessages = document.getElementById('chat-messages');
chatMessages.innerHTML = '';
saveChatHistory();
}
// Define a function to save chat history
function saveChatHistory() {
const chatMessages = Array.from(document.getElementById('chat-messages').children);
const chatHistory = chatMessages.map(pair => {
const bubbles = Array.from(pair.children);
const texts = bubbles.map(bubble => bubble.innerText);
return {
user: texts[0],
bot: texts[1]
};
});
localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
}
// Define a function to load chat history
function loadChatHistory() {
const chatHistory = JSON.parse(localStorage.getItem('chatHistory'));
if (chatHistory) {
const chatMessages = document.getElementById('chat-messages');
chatMessages.innerHTML = ''; // Clear any existing messages
for (const pair of chatHistory) {
addChatBubble(pair.user, 'user');
addChatBubble(pair.bot, 'bot');
}
}
}
// Load chat history on page load
document.addEventListener('DOMContentLoaded', loadChatHistory);
</script>