chat.html

Using an LLM to enhance the discoverability of your content is quickly becoming a baseline requirement for documentation. Thankfully, it’s not too hard to do thanks to Hugo’s output to JSON.

At a high level, you’ll need to provide some server-side code in Python or JS that routes user questions to chatGPT after being passed some embeddings (created from your docs JSON) for context.

How it Works

This partial sends an API request to a GCP cloud function you’ll need to set up that uses Flask (built in) to:

  1. Search a Pinecone vector database filled with embeddings created from your documentation.
  2. Perform a similarity search and return the 4 most relevant chunks.
  3. Forward those chunks to the OpenAI API via LangChain to perform RAG services.
  4. Return an answer based on the question and content provided.
have it your way
There are several ways to implement a RAG LLM UX — this is just the way that currently works for me. It seems like in the future people may shift from LangChain to the official Assistant API. Hopefully sharing this implementation helps you achieve yours!

Set Up

To use this feature, you’re going to need to:

  1. Set up a Vector DB (doesn’t have to be Pinecone, LangChain supports multiple options).
  2. Convert your site index.json into embeddings and save them to the DB.
  3. Deploy a cloud function that can accept and route questions.
python 3.12
The tiktoken requirement runs into issues on Python 3.12; for now, I recommend using 3.10 if deploying with a GCP function.

Create & Store Embeddings

import os
from dotenv import load_dotenv
import time

from langchain.document_loaders import JSONLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone 
from langchain.embeddings.openai import OpenAIEmbeddings
import pinecone 


load_dotenv()
openai_key = os.environ.get('OPENAI_API_KEY')
pinecone_key = os.environ.get('PINECONE_API_KEY')
pinecone_environment = os.environ.get('PINECONE_ENVIRONMENT')
pinecone_index = os.environ.get('PINECONE_INDEX')

docs_index_path = "./docs.json" 
docs_index_schema = ".[]" # [{"body:..."}] -> .[].body; see JSONLoader docs for more info
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0,)

def chunks(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        yield lst[i:i + n]

def metadata_func(record: dict, metadata: dict) -> dict:
    metadata["title"] = record.get("title")
    metadata["relURI"] = record.get("relURI")
    return metadata

loader = JSONLoader(docs_index_path, jq_schema=docs_index_schema, metadata_func=metadata_func, content_key="body") 

data = loader.load()
texts = text_splitter.split_documents(data) 

pinecone.init(
    api_key=pinecone_key,
    environment=pinecone_environment,
)

if pinecone_index in pinecone.list_indexes():
    print(f'The {pinecone_index} index already exists! We need to replace it with a new one.')
    print("Erasing existing index...")
    pinecone.delete_index(pinecone_index) 

time.sleep(60)
print("Recreating index...")
# wait a minute for the index to be deleted
pinecone.create_index(pinecone_index, metric="cosine", dimension=1536, pods=1, pod_type="p1") 


if pinecone_index in pinecone.list_indexes():

    print(f"Loading {len(texts)} texts to index {pinecone_index}... \n This may take a while. Here's a preview of the first text: \n {texts[0].metadata} \n {texts[0].page_content}")

    for chunk in chunks(texts, 25):
        for doc in chunk:
            if doc.page_content.strip(): 
                print(f"Indexing: {doc.metadata['title']}")
                print(f"Content: {doc.page_content}")
                Pinecone.from_texts([doc.page_content], embedding=embeddings, index_name=pinecone_index, metadatas=[doc.metadata])
            else:
                print("Ignoring blank document")
    print("Done!")  

Deploy Cloud Function

import os
import functions_framework

from langchain.llms import OpenAI 
from langchain.chains.question_answering import load_qa_chain
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Pinecone 
from langchain.memory import ConversationBufferMemory

openai_key = os.environ.get('OPENAI_API_KEY')
pinecone_key = os.environ.get('PINECONE_API_KEY')
pinecone_environment = os.environ.get('PINECONE_ENVIRONMENT')
pinecone_index = os.environ.get('PINECONE_INDEX')


def convert_to_document(message):
    class Document:
        def __init__(self, page_content, metadata):
            self.page_content = page_content
            self.metadata = metadata
    return Document(page_content=message, metadata={})


def answer_question(question: str, vs, chain, memory):
    relevant_docs = vs.similarity_search(question)
    conversation_history = memory.load_memory_variables(inputs={})["history"]
    context_window = conversation_history.split("\n")[-3:] 
    conversation_document = convert_to_document(context_window)
    input_documents = relevant_docs + [conversation_document]

    answer = chain.run(input_documents=input_documents, question=question)
    memory.save_context(inputs={"question": question}, outputs={"answer": answer})
    docs_metadata = []
    for doc in relevant_docs:
        metadata = doc.metadata
        if metadata is not None:
            doc_metadata = {
                "title": metadata.get('title', None),
                "relURI": metadata.get('relURI', None)
            }
            docs_metadata.append(doc_metadata)

    return {"answer": answer, "docs": docs_metadata}

llm = OpenAI(temperature=1, openai_api_key=openai_key, max_tokens=-1, streaming=True) 
chain = load_qa_chain(llm, chain_type="stuff")
embeddings = OpenAIEmbeddings(openai_api_key=openai_key)
docsearch = Pinecone.from_existing_index(pinecone_index, embeddings)
memory = ConversationBufferMemory()

import functions_framework

@functions_framework.http
def start(request):
    # For more information about CORS and CORS preflight requests, see:
    # https://developer.mozilla.org/en-US/docs/Glossary/Preflight_request

    # Set CORS headers for the preflight request
    if request.method == 'OPTIONS':
        # Allows GET requests from any origin with the Content-Type
        # header and caches preflight response for an 3600s
        headers = {
            'Access-Control-Allow-Origin': '*',
            'Access-Control-Allow-Methods': 'GET',
            'Access-Control-Allow-Headers': 'Content-Type',
            'Access-Control-Max-Age': '3600'
        }

        return ('', 204, headers)

    # Set CORS headers for the main request
    headers = {
        'Access-Control-Allow-Origin': '*'
    }

    request_json = request.get_json(silent=True)
    request_args = request.args

    if request_json and 'query' in request_json:
        question = request_json['query']

    elif request_args and 'query' in request_args:
        question = request_args['query']
    else:
        question = 'What is MiloDocs?'


    return (answer_question(question=question, vs=docsearch, chain=chain, memory=memory), 200, headers)

Source Code

Help Wanted
If you know how to successfully separate this JS into its own file in assets/js, please submit a PR. It doesn’t work for me!
<div id="chatContainer" class="hidden sticky top-16 h-[calc(100vh-5rem)] flex flex-col flex justify-end">
    <div id="chat-messages" class="flex flex-col overflow-y-auto text-base">
    </div>
    <div id="chat-controls" class="flex flex-row text-xs mt-2">
        <form onsubmit="submitQuestion(event)" class="flex flex-row">
            <input id="question" type="text" aria-label="Question Input" placeholder="Ask the docs" class="h-10 border rounded-lg p-1 mr-1 focus:outline-none focus:ring-2 focus:ring-brand" />
            <button id="sendButton" aria-label="Send" class="flex items-center bg-brand my-1  hover:bg-black text-white p-1 mr-1 rounded-lg shadow-lg transition duration-300"><img src="/icons/send.svg" alt="Send" class="w-5 h-5"></button>
        </form>
        <button id="clearAll" aria-label="Delete All" onclick="clearConversation()" class="flex items-center bg-black my-1 hover:bg-red-600 text-white p-1 rounded-lg shadow-lg transition duration-300"><img src="/icons/delete.svg" alt="Delete" class="w-5 h-5"></button>
    </div>
</div>

<script>
    // Define a function to handle form submission
function submitQuestion(event) {
    event.preventDefault();
    const questionInput = document.getElementById('question');
    const questionText = questionInput.value.trim();
    if (!questionText) return;  // Exit if the question is empty
    questionInput.value = '';  // Clear the input field
    addChatBubble(questionText, 'user');
    fetchAnswer(questionText);
}

// Define a function to fetch answer from the API
async function fetchAnswer(question) {
    const response = await fetch(`https://milodocs-lc4762co7a-uc.a.run.app/?query=${encodeURIComponent(question)}`);
    const data = await response.json();
    const answer = data.answer || 'Sorry, I could not fetch the answer.';
    addChatBubble(answer, 'bot');
}

// Define a function to add chat bubble
function addChatBubble(text, sender) {
    const chatMessages = document.getElementById('chat-messages');
    let pair = chatMessages.lastElementChild;
    if (!pair || !pair.classList.contains('chat-pair') || sender === 'user') {
        pair = document.createElement('div');
        pair.className = 'chat-pair bg-zinc-100 flex flex-col  my-2 p-2 rounded-lg';
        chatMessages.appendChild(pair);
    }
    const bubble = document.createElement('div');
    bubble.className = `chat-bubble ${sender} p-2 rounded-lg text-black ${sender === 'user' ? 'font-brand font-semibold' : 'font-brand font-regular'}`;
    bubble.innerText = text;
    pair.appendChild(bubble);
    if (sender === 'user') {
        bubble.classList.add('animate-pulse');  // Add pulsing animation to user bubble
    } else {
        const userBubble = pair.querySelector('.user');
        if (userBubble) userBubble.classList.remove('animate-pulse');  // Remove pulsing animation when bot responds
        const deleteButtonWrapper = document.createElement('div');
        deleteButtonWrapper.className = 'w-full flex justify-end';

        const deleteButton = document.createElement('button');
        deleteButton.className = 'w-fit p-2 rounded bg-zinc-200 text-xs lowercase hover:bg-red-600 hover:text-white transition duration-300 text-black';
        deleteButton.innerText = 'Delete';
        deleteButton.addEventListener('click', () => {
            chatMessages.removeChild(pair);
            saveChatHistory();
        });

        deleteButtonWrapper.appendChild(deleteButton);
        pair.appendChild(deleteButtonWrapper);
    }
    
    // Scroll to the bottom of the chat container
    chatMessages.scrollTop = chatMessages.scrollHeight;

    saveChatHistory();
}

// Define a function to clear conversation
function clearConversation() {
    const chatMessages = document.getElementById('chat-messages');
    chatMessages.innerHTML = '';
    saveChatHistory();
}

// Define a function to save chat history
function saveChatHistory() {
    const chatMessages = Array.from(document.getElementById('chat-messages').children);
    const chatHistory = chatMessages.map(pair => {
        const bubbles = Array.from(pair.children);
        const texts = bubbles.map(bubble => bubble.innerText);
        return {
            user: texts[0],
            bot: texts[1]
        };
    });
    localStorage.setItem('chatHistory', JSON.stringify(chatHistory));
}

// Define a function to load chat history
function loadChatHistory() {
    const chatHistory = JSON.parse(localStorage.getItem('chatHistory'));
    if (chatHistory) {
        const chatMessages = document.getElementById('chat-messages');
        chatMessages.innerHTML = '';  // Clear any existing messages
        for (const pair of chatHistory) {
            addChatBubble(pair.user, 'user');
            addChatBubble(pair.bot, 'bot');
        }
    }
}

// Load chat history on page load
document.addEventListener('DOMContentLoaded', loadChatHistory);

</script>