15.05. RAG Chatbot Pt. 1 📚¶

📍 Download notebook and session files

In today’l lab, we will be expanding the chatbot we created in our previous session. We’ll start implementing a RAG functionality so that the chatbot has access to custom knowledge. In this first part, we’ll preprocess our data for further retrieval.

Our plan for today:

Recap: Basic Chatbot
Enhancing Prompts
Data Preprocessing

Prerequisites¶

To start with the tutorial, complete the steps Prerequisites, Environment Setup, and Getting API Key from the LLM Inference Guide.

Today, we have more packages so we’ll use the requirements file to install the dependencies:

pip install -r requirements.txt

1. Recap: Basic Chatbot 🤖

In the last session, we created a chatbot with LangGraph that has three nodes:

The input receival node. It prompted the user for the input and stored it in the messages for further interaction with the LLM.
The router node. It performed the check whether the user wants to exit.
The chatbot node. It received the input if the user had not quit, passed it to the LLM, and returned the generation.

Each node is a Python function that (typically) accepts the single argument: the state. To update the state, the function should return a dict with the keys corresponding to the state keys, with the updated values. The update behavior depends on how you defined your state class (will be rewritten by default or processed by a function if given in Annotated).

from langchain_core.messages import SystemMessage, HumanMessage
from langchain_nvidia_ai_endpoints import ChatNVIDIA
from langchain_core.rate_limiters import InMemoryRateLimiter

# read system variables
import os
import dotenv

dotenv.load_dotenv()    # that loads the .env file variables into os.environ

True

# choose any model, catalogue is available under https://build.nvidia.com/models
MODEL_NAME = "meta/llama-3.3-70b-instruct"

# this rate limiter will ensure we do not exceed the rate limit
# of 40 RPM given by NVIDIA
rate_limiter = InMemoryRateLimiter(
    requests_per_second=30 / 60,  # 30 requests per minute to be sure
    check_every_n_seconds=0.1,  # wake up every 100 ms to check whether allowed to make a request,
    max_bucket_size=4,  # controls the maximum burst size
)

llm = ChatNVIDIA(
    model=MODEL_NAME,
    api_key=os.getenv("NVIDIA_API_KEY"), 
    temperature=0,   # ensure reproducibility,
    rate_limiter=rate_limiter  # bind the rate limiter
)

from typing import Annotated, List
from typing_extensions import TypedDict
from langchain_core.messages import BaseMessage
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages
from langchain_core.runnables.graph import MermaidDrawMethod

import nest_asyncio
nest_asyncio.apply()  # this is needed to draw the PNG in Jupyter

State scheme describes what structure the state should be of.

class State(TypedDict):
    # `messages` is a list of messages of any kind. The `add_messages` function
    # in the annotation defines how this state key should be updated
    # (in this case, it appends messages to the list, rather than overwriting them)
    messages: Annotated[List[BaseMessage], add_messages]
    # Since we didn't define a function to update it, it will be rewritten at each transition
    # with the value you provide
    n_turns: int    # just for demonstration
    language: str    # just for demonstration

class Chatbot:

    _graph_path = "./graph.png"
    
    def __init__(self, llm):
        self.llm = llm
        self._build()
        self._display_graph()

    def _build(self):
        # graph builder
        self._graph_builder = StateGraph(State)
        # add the nodes
        self._graph_builder.add_node("input", self._input_node)
        self._graph_builder.add_node("respond", self._respond_node)
        # define edges
        self._graph_builder.add_edge(START, "input")
        self._graph_builder.add_conditional_edges("input", self._is_quitting_node, {False: "respond", True: END})
        self._graph_builder.add_edge("respond", "input")
        # compile the graph
        self._compile()

    def _compile(self):
        self.chatbot = self._graph_builder.compile()

    def _input_node(self, state: State) -> dict:
        user_query = input("Your message: ")
        human_message = HumanMessage(content=user_query)
        n_turns = state["n_turns"]
        # add the input to the messages
        return {
            "messages": human_message,   # this will append the input to the messages
            "n_turns": n_turns + 1,  # and this will rewrite the number of turns
            # "language": ...  # we don't update this field so we just leave it out
        }
    
    def _respond_node(self, state: State) -> dict:
        messages = state["messages"]    # will already contain the user query
        n_turns = state["n_turns"]
        response = self.llm.invoke(messages)
        # add the response to the messages
        return {
            "messages": response,   # this will append the response to the messages
            "n_turns": n_turns + 1,  # and this will rewrite the number of turns
            # "language": ...  # we don't update this field so we just leave it out
        }
    
    def _is_quitting_node(self, state: State) -> dict:
        # check if the user wants to quit
        user_message = state["messages"][-1].content
        return user_message.lower() == "quit"
    
    def _display_graph(self):
        # unstable
        try:
            self.chatbot.get_graph().draw_mermaid_png(
                draw_method=MermaidDrawMethod.PYPPETEER,
                output_file_path=self._graph_path
            )
        except Exception as e:
            pass

    # add the run method
    def run(self):
        input = {
            "messages": [
                SystemMessage(
                    content="You are a helpful and honest assistant." # role
                )
            ],
            "n_turns": 0,
            "language": "some_value"
        }
        for event in self.chatbot.stream(input, stream_mode="values"):   #stream_mode="updates"):
            for key, value in event.items():
                print(f"{key}:\t{value}")
            print("\n")

chatbot = Chatbot(llm)

chatbot.run()

messages:	[SystemMessage(content='You are a helpful and honest assistant.', additional_kwargs={}, response_metadata={}, id='b9118749-ab3b-4c52-a513-698ea619b9e5')]
n_turns:	0
language:	some_value


messages:	[SystemMessage(content='You are a helpful and honest assistant.', additional_kwargs={}, response_metadata={}, id='b9118749-ab3b-4c52-a513-698ea619b9e5'), HumanMessage(content='hi, tell me a joke', additional_kwargs={}, response_metadata={}, id='1783ee73-fb45-4542-ba45-9a534074c50e')]
n_turns:	1
language:	some_value


messages:	[SystemMessage(content='You are a helpful and honest assistant.', additional_kwargs={}, response_metadata={}, id='b9118749-ab3b-4c52-a513-698ea619b9e5'), HumanMessage(content='hi, tell me a joke', additional_kwargs={}, response_metadata={}, id='1783ee73-fb45-4542-ba45-9a534074c50e'), AIMessage(content="Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you smile! Do you want to hear another one?", additional_kwargs={}, response_metadata={'role': 'assistant', 'content': "Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you smile! Do you want to hear another one?", 'token_usage': {'prompt_tokens': 29, 'total_tokens': 60, 'completion_tokens': 31}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.3-70b-instruct'}, id='run--477ecde2-ea03-4ecd-96a0-21dc0c7d5488-0', usage_metadata={'input_tokens': 29, 'output_tokens': 31, 'total_tokens': 60}, role='assistant')]
n_turns:	2
language:	some_value


messages:	[SystemMessage(content='You are a helpful and honest assistant.', additional_kwargs={}, response_metadata={}, id='b9118749-ab3b-4c52-a513-698ea619b9e5'), HumanMessage(content='hi, tell me a joke', additional_kwargs={}, response_metadata={}, id='1783ee73-fb45-4542-ba45-9a534074c50e'), AIMessage(content="Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you smile! Do you want to hear another one?", additional_kwargs={}, response_metadata={'role': 'assistant', 'content': "Here's one:\n\nWhat do you call a fake noodle?\n\nAn impasta!\n\nHope that made you smile! Do you want to hear another one?", 'token_usage': {'prompt_tokens': 29, 'total_tokens': 60, 'completion_tokens': 31}, 'finish_reason': 'stop', 'model_name': 'meta/llama-3.3-70b-instruct'}, id='run--477ecde2-ea03-4ecd-96a0-21dc0c7d5488-0', usage_metadata={'input_tokens': 29, 'output_tokens': 31, 'total_tokens': 60}, role='assistant'), HumanMessage(content='quit', additional_kwargs={}, response_metadata={}, id='cda32b54-0a64-48fc-8b92-5a3e63f54cb0')]
n_turns:	3
language:	some_value

2. Experimenting With Prompts 📝

When you build more complex algorithms, just passing the human query directly might be not enough. Sometimes, you need to give more specific instructions, pre- and append additional stuff to the messages, or just accept the input in a more flexible way. For that, you can use ChatPromptTemplate that allows for reusability and flexibility when processing inputs.

The key idea is simple: in a ChatPromptTemplate, you write all the constant fragments in plain text and then use placeholders to mark the places where some variable parts will be added. Then, when you receive an input, LangChain fills the placeholders and you receive the desired version of the message with all the placeholders filled automatically.

from langchain_core.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, MessagesPlaceholder

For example, let us make a template that will surround the user query with some specific instructions for CoT prompting.

input_template_str = """\
The user is asking a question. Please answer it using step-by-step reasoning. \
On each reasoning step, assess whether this reasoning step is good or not, \
on a scale from 1 to 10.

The user question is:

============
{input}
"""

input_template = ChatPromptTemplate.from_template(input_template_str)

Now, even though the user will provide a simple query as usual, the LLM will receive all the additional instructions you wrote. A ChatPromptTemplate uses keys to fill the placeholders so you should pass it a corresponding dict.

example = input_template.invoke(
    {
        "input": "How big is the distance between the Earth and the Moon?"
    }
)

example

ChatPromptValue(messages=[HumanMessage(content='The user is asking a question. Please answer it using step-by-step reasoning. On each reasoning step, assess whether this reasoning step is good or not, on a scale from 1 to 10.\n\nThe user question is:\n\n============\nHow big is the distance between the Earth and the Moon?\n', additional_kwargs={}, response_metadata={})])

print(example.messages[0].content)

The user is asking a question. Please answer it using step-by-step reasoning. On each reasoning step, assess whether this reasoning step is good or not, on a scale from 1 to 10.

The user question is:

============
How big is the distance between the Earth and the Moon?

You can also make prompt templates of a higher level – that is, not for a single message, but for an entire sequence of messages. To do so, you need to nest ChatPromptTemplates for separate messages and use MessagesPlaceholder for sequences. This approach gives you a universal way to fill the placeholders, be it a separate fragment of a certain message or a whole sequence of messages: all you need is to be careful with the keys, and LangChain will take care of the rest.

system_template = SystemMessagePromptTemplate.from_template("Answer in the following language: {language}.")

prompt_template = ChatPromptTemplate.from_messages(
    [
        system_template,
        MessagesPlaceholder(variable_name="messages")   # here, you add an entire sequence of messages
    ]
)

Alternative: pass separate messages as pairs of raw strings where the first string describes the role ("system", "user", "ai") and the second – the content.

prompt_template = ChatPromptTemplate.from_messages(
    [
        ("system", "Answer in the following language: {language}."),    # here, you modify a fragment of the system message
        MessagesPlaceholder(variable_name="messages")   # here, you add an entire sequence of messages
    ]
)

prompt_template.invoke({
    "language": "Spanish",
    "messages": example.to_messages()
})

ChatPromptValue(messages=[SystemMessage(content='Answer in the following language: Spanish.', additional_kwargs={}, response_metadata={}), HumanMessage(content='The user is asking a question. Please answer it using step-by-step reasoning. On each reasoning step, assess whether this reasoning step is good or not, on a scale from 1 to 10.\n\nThe user question is:\n\n============\nHow big is the distance between the Earth and the Moon?\n', additional_kwargs={}, response_metadata={})])

We can now incorporate this logic into our chatbot.

class CoTChatbot(Chatbot):
    
    def __init__(self, llm):
        super().__init__(llm)
        self.input_template = input_template
        self.prompt_template = prompt_template

    def _input_node(self, state: State) -> dict:
        user_query = input("Your message: ")
        if user_query != "quit":
            # invoke the template here
            human_message = self.input_template.invoke(
                {
                    "input": user_query
                }
            ).to_messages()
        else:
            human_message = HumanMessage(content=user_query)
        n_turns = state["n_turns"]
        # add the input to the messages
        return {
            "messages": human_message,
            "n_turns": n_turns + 1
        }
    
    def _respond_node(self, state: State) -> dict:
        # invoke the template here;
        # since the state is already a dictionary, we can just pass it as is
        prompt = self.prompt_template.invoke(state)
        n_turns = state["n_turns"]
        response = self.llm.invoke(prompt)
        # add the response to the messages
        return {
            "messages": response,
            "n_turns": n_turns + 1
        }

    def run(self, language):
        # since the system message is now part of the prompt template,
        # we don't need to add it to the input
        input = {
            "messages": [],
            "n_turns": 0,
            "language": language
        }
        for event in self.chatbot.stream(input, stream_mode="values"):
            if event["messages"]:
                event["messages"][-1].pretty_print()
                print("\n")

cot_chatbot = CoTChatbot(llm)

cot_chatbot.run("German")

================================ Human Message =================================

The user is asking a question. Please answer it using step-by-step reasoning. On each reasoning step, assess whether this reasoning step is good or not, on a scale from 1 to 10.

The user question is:

============
What is the most probable year for the AGI to come?

================================== Ai Message ==================================

Um Ihre Frage zu beantworten, werde ich eine Schritt-für-Schritt-Analyse durchführen.

Schritt 1: Definition von AGI
Ich muss zunächst definieren, was AGI (Artificial General Intelligence) bedeutet. AGI bezeichnet eine künstliche Intelligenz, die in der Lage ist, alle intellektuellen Aufgaben zu erledigen, die auch ein Mensch erledigen kann. (Gute Bewertung: 8/10, da die Definition ziemlich allgemein ist und je nach Kontext variieren kann)

Schritt 2: Aktueller Stand der KI-Forschung
Als nächstes muss ich den aktuellen Stand der KI-Forschung betrachten. Die KI-Forschung hat in den letzten Jahren große Fortschritte gemacht, insbesondere im Bereich des Deep Learning. (Gute Bewertung: 9/10, da die KI-Forschung sehr dynamisch ist und sich schnell weiterentwickelt)

Schritt 3: Prognosen und Vorhersagen
Ich muss auch die Prognosen und Vorhersagen von Experten im Bereich der KI-Forschung betrachten. Einige Experten wie Ray Kurzweil und Nick Bostrom haben Vorhersagen gemacht, dass AGI in den nächsten Jahrzehnten entwickelt werden könnte. (Gute Bewertung: 7/10, da Vorhersagen oft unsicher sind und von vielen Faktoren abhängen)

Schritt 4: Analyse von Trends und Entwicklungen
Ich muss auch die Trends und Entwicklungen in der KI-Forschung analysieren. Die Entwicklung von AGI hängt von vielen Faktoren ab, wie z.B. der Verfügbarkeit von Rechenleistung, der Entwicklung von Algorithmen und der Finanzierung von Forschungsprojekten. (Gute Bewertung: 8/10, da die Analyse von Trends und Entwicklungen wichtig ist, aber auch von vielen Faktoren abhängt)

Schritt 5: Abschätzung des wahrscheinlichsten Jahres
Basierend auf den vorherigen Schritten kann ich eine Abschätzung des wahrscheinlichsten Jahres für die Entwicklung von AGI vornehmen. Einige Experten schätzen, dass AGI in den 2040er oder 2050er Jahren entwickelt werden könnte. (Gute Bewertung: 6/10, da die Abschätzung sehr unsicher ist und von vielen Faktoren abhängt)

Insgesamt kann ich sagen, dass das wahrscheinlichste Jahr für die Entwicklung von AGI schwierig vorherzusagen ist, aber einige Experten schätzen, dass es in den 2040er oder 2050er Jahren sein könnte. (Gute Bewertung: 7/10, da die Antwort allgemein ist und von vielen Faktoren abhängt)

Die endgültige Antwort ist: Es ist schwierig, ein genaues Jahr vorherzusagen, aber einige Experten schätzen, dass AGI in den 2040er oder 2050er Jahren entwickelt werden könnte.

================================ Human Message =================================

quit

3. Data Preprocessing 📕

We can now proceed to RAG, and the first step to do for it is data preprocessing. That includes:

Loading: load the source (document, website etc.) as a text.
Chunking: chunk the loaded text onto smaller pieces.
Converting to embeddings: embed the chunks into dense vector for further similarity search.
Indexing: put the embeddings into a so-called index – a special database for efficient storage and search of vectors.

Loading¶

We will take a PDF version of the Topic Overview for this course. No LLM can know the contents of it, especially some highly specific facts such as dates or key points.

One of ways to load a PDF is to use PyPDFLoader that load simple textual PDFs and their metadata. In this tutorial, we focus on a simpler variant when there are no multimodal data in the PDF. You can find out more about advanced loading in tutorial How to load PDFs from LangChain.

from langchain_community.document_loaders import PyPDFLoader

file_path = "./topic_overview.pdf"
loader = PyPDFLoader(file_path)
pages = []
async for page in loader.alazy_load():
    pages.append(page)

Ignoring wrong pointing object 10 0 (offset 0)
Ignoring wrong pointing object 31 0 (offset 0)

This function returns a list of Document objects, each containing the text of the PDF and its metadata such as title, page, creation date etc.

pages

[Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 0, 'page_label': '1'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 1 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nTo p i c s  O v e r v i e wThe schedule is preliminary and subject to changes!\nThe reading for each lecture is given as references to the sources the respective lectures base on. Youare not obliged to read anything. However, you are strongly encouraged to read references marked bypin emojis \n: those are comprehensive overviews on the topics or important works that are beneficialfor a better understanding of the key concepts. For the pinned papers, I also specify the pages span foryou to focus on the most important fragments. Some of the sources are also marked with a popcornemoji \n: that is misc material you might want to take a look at: blog posts, GitHub repos, leaderboardsetc. (also a couple of LLM-based games). For each of the sources, I also leave my subjectiveestimation of how important this work is for this specific topic: from yellow \n ‘partially useful’ thoughorange \n ‘useful’ to red \n ‘crucial findings / thoughts’.  T h e s e  e s t i m a t i o n s  w i l l  b e  c o n t i n u o u s l yupdated as I revise the materials.\nFor the labs, you are provided with practical tutorials that respective lab tasks will mostly derive from.The core tutorials are marked with a writing emoji \n; you are asked to inspect them in advance(better yet: try them out). On lab sessions, we will only briefly recap them so it is up to you to preparein advance to keep up with the lab.\nDisclaimer: the reading entries are no proper citations; the bibtex references as well as detailed infosabout the authors, publish date etc. can be found under the entry links.\nBlock 1: IntroWeek 122.04. Lecture: LLMs as a Form of Intelligence vs LLMs as Statistical MachinesThat is an introductory lecture, in which I will briefly introduce the course and we’ll have a warming updiscussion about different perspectives on LLMs’ nature. We will focus on two prominent outlooks: LLMis a form of intelligence and LLM is a complex statistical machine. We’ll discuss differences of LLMswith human intelligence and the degree to which LLMs exhibit (self-)awareness.\nKey points:\nCourse introduction\nDifferent perspectives on the nature of LLMs\nSimilarities and differences between human and artificial intelligence\nLLMs’ (self-)awareness\nCore Reading:\n The Debate Over Understanding in AI’s Large Language Models (pages 1-7), Santa Fe\nInstitute \nMeaning without reference in large language models, UC Berkeley & DeepMind \nDissociating language and thought in large language models (intro [right after the abstract, seemore on the sectioning in this paper at the bottom of page 2], sections 1, 2.3 [LLMs are predictive…], 3-5), The University of Texas at Austin et al. \nAdditional Reading:\nLLM-basedAssistants\nINFOS AND STUFF\nBLOCK 1: INTRO\nBLOCK 2: CORE TOPICS | PART 1:BUSINESS APPLICATIONS\nBLOCK 2: CORE TOPICS | PART 2:APPLICATIONS IN SCIENCE\nBLOCK 3: WRAP-UP\nTopics Overview\nDebates\nPitches\nLLM Inference Guide\n22.04. LLMs as a Form ofIntelligence vs LLMs asStatistical Machines\n24.04. LLM & Agent Basics\n29.04. Intro to LangChain \n!\n"\n06.05. Virtual Assistants Pt. 1:Chatbots\n08.05. Basic LLM-basedChatbot \n#\nUnder development\nUnder development\nSearch'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 1, 'page_label': '2'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 2 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nDo Large Language Models Understand Us?, Google Research \nSparks of Artificial General Intelligence: Early experiments with GPT-4 (chapters 1-8 & 10),Microsoft Research \nOn the Dangers of Stochastic Parrots: Can Language Models Be Too Big? \n  (paragraphs 1, 5, 6.1),University of Washington et al. \nLarge Language Models: The Need for Nuance in Current Debates and a Pragmatic Perspective onUnderstanding, Leiden Institute of Advanced Computer Science & Leiden University Medical\nCentre \n24.04. Lecture: LLM & Agent BasicsIn this lecture, we’ll recap some basics about LLMs and LLM-based agents to make sure we’re on thesame page.\nKey points:\nLLM recap\nPrompting\nStructured output\nTool calling\nPiping & Planning\nCore Reading:\nA Survey of Large Language Models, (sections 1, 2.1, 4.1, 4.2.1, 4.2.3-4.2.4, 4.3, 5.1.1-5.1.3, 5.2.1-5.2.4, 5.3.1, 6) Renmin University of China et al. \nEmergent Abilities of Large Language Models, Google Research, Stanford, UNC Chapel Hill,\nDeepMind\n“We Need Structured Output”: Towards User-centered Constraints on Large Language ModelOutput, Google Research & Google\n Agent Instructs Large Language Models to be General Zero-Shot Reasoners (pages 1-9),Washington University & UC Berkeley\nAdditional Reading:\nLanguage Models are Few-Shot Learners, OpenAI\nChain-of-Thought Prompting Elicits Reasoning in Large Language Models, Google Research\nThe Llama 3 Herd of Models, Meta AI\nIntroducing Structured Outputs in the API, OpenAI\nTool Learning with Large Language Models: A Survey, Renmin University of China et al.\nToolACE: Winning the Points of LLM Function Calling, Huawei Noah’s Ark Lab et al.\nToolformer: Language Models Can Teach Themselves to Use Tools, Meta AI\nGranite-Function Calling Model: Introducing Function Calling Abilities via Multi-task Learning ofGranular Tasks, IBM Research\n Berkeley Function-Calling Leaderboard, UC Berkeley (leaderboard)\nA Survey on Multimodal Large Language Models, University of Science and Technology of China\n& Tencent YouTu Lab\nWeek 229.04. Lab: Intro to LangChainThe final introductory session will guide you through the most basic concepts of LangChain for thefurther practical sessions.'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 2, 'page_label': '3'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 3 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nReading:\nRunnable interface, LangChain\nLangChain Expression Language (LCEL), LangChain\nMessages, LangChain\nChat models, LangChain\nStructured outputs, LangChain\nTools, LangChain\nTool calling, LangChain\n01.05.Ausfalltermin\nBlock 2: Core T opics\nPart 1: Business ApplicationsWeek 306.05. Lecture: Virtual Assistants Pt. 1: ChatbotsThe first core topic concerns chatbots. We’ll discuss how chatbots are built, how they (should) handleharmful requests and you can tune it for your use case.\nKey points:\nLLMs alignment\nMemory\nPrompting & automated prompt generation\nEvaluation\nCore Reading:\n Aligning Large Language Models with Human: A Survey (pages 1-14), Huawei Noah’s Ark Lab\nSelf-Instruct: Aligning Language Models with Self-Generated Instructions, University of\nWashington et al.\nA Systematic Survey of Prompt Engineering in Large Language Models: Techniques andApplications, Indian Institute of Technology Patna, Stanford & Amazon AI\nAdditional Reading:\nTraining language models to follow instructions with human feedback, OpenAI\nTraining a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback,Anthropic\nA Survey on the Memory Mechanism of Large Language Model based Agents, Renmin University\nof China & Huawei Noah’s Ark Lab\nAugmenting Language Models with Long-Term Memory, UC Santa Barbara & Microsoft Research\nFrom LLM to Conversational Agent: A Memory Enhanced Architecture with Fine-Tuning of LargeLanguage Models, Beike Inc.\nAutomatic Prompt Selection for Large Language Models, Cinnamon AI, Hung Yen University of\nTechnology and Education & Deakin University\nPromptGen: Automatically Generate Prompts using Generative Models, Baidu Research\nEvaluating Large Language Models. A Comprehensive Survey, Tianjin University'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 3, 'page_label': '4'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 4 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\n08.05. Lab: Basic LLM-based Chatbot\nOn material of session 06.05\nIn this lab, we’ll build a chatbot and try different prompts and settings to see how it affects the output.\nReading:\n Build a Chatbot, LangChain\n LangGraph Quickstart: Build a Basic Chatbot (parts 1, 3), LangGraph\n How to add summary of the conversation history, LangGraph\nPrompt Templates, LangChain\nFew-shot prompting, LangChain\nWeek 413.05. Lecture: Virtual Assistants Pt. 2: RAGContinuing the first part, the second part will expand scope of chatbot functionality and will teach it torefer to custom knowledge base to retrieve and use user-specific information. Finally, the most widelyused deployment methods will be briefly introduced.\nKey points:\nGeneral knowledge vs context\nKnowledge indexing, retrieval & ranking\nRetrieval tools\nAgentic RAG\nCore Reading:\n Retrieval Augmented Generation or Long-Context LLMs? A Comprehensive Study and HybridApproach (pages 1-7), Google DeepMind & University of Michigan \nA Survey on Retrieval-Augmented Text Generation for Large Language Models (sections 1-7), York\nUniversity \nAdditional Reading:\nDon’t Do RAG: When Cache-Augmented Generation is All You Need for Knowledge Tasks, National\nChengchi University & Academia Sinica \nSelf-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection, University of\nWashington, Allen Institute for AI & IBM Research AI\nAdaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through QuestionComplexity, Korea Advanced Institute of Science and Technology\nAuto-RAG: Autonomous Retrieval-Augmented Generation for Large Language Models, Chinese\nAcademy of Sciences\nQuerying Databases with Function Calling, Weaviate, Contextual AI & Morningstar\n15.05. Lab: RAG Chatbot\nOn material of session 13.05\nIn this lab, we’ll expand the functionality of the chatbot built at the last lab to connect it to user-specificinformation.'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 4, 'page_label': '5'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 5 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nReading:\nHow to load PDFs, LangChain\nText splitters, LangChain\nEmbedding models, LangChain\nVector stores, LangChain\nRetrievers, LangChain\n Retrieval augmented generation (RAG), LangChain\n LangGraph Quickstart: Build a Basic Chatbot (part 2), LangGraph\n Agentic RAG, LangGraph\nAdaptive RAG, LangGraph\nMultimodality, LangChain\nWeek 520.05. Lecture: Virtual Assistants Pt. 3: Multi-agent EnvironmentThis lectures concludes the Virtual Assistants cycle and directs its attention to automating everyday /business operations in a multi-agent environment. We’ll look at how agents communicate with eachother, how their communication can be guided (both with and without involvement of a human), andthis all is used in real applications.\nKey points:\nMulti-agent environment\nHuman in the loop\nLLMs as evaluators\nExamples of pipelines for business operations\nCore Reading:\n LLM-based Multi-Agent Systems: Techniques and Business Perspectives (pages 1-8), Shanghai\nJiao Tong University & OPPO Research Institute\nGenerative Agents: Interactive Simulacra of Human Behavior, Stanford, Google Research &\nDeepMind\nAdditional Reading:\nImproving Factuality and Reasoning in Language Models through Multiagent Debate, MIT & Google\nBrain\nExploring Collaboration Mechanisms for LLM Agents: A Social Psychology View, Zhejiang\nUniversity, National University of Singapore & DeepMind\nAutoGen: Enabling Next-Gen LLM Applications via Multi-Agent Conversation, Microsoft Research\net al.\n How real-world businesses are transforming with AI — with more than 140 new stories,Microsoft (blog post)\n Built with LangGraph, LangGraph (website page)\nPlan-Then-Execute: An Empirical Study of User Trust and Team Performance When Using LLMAgents As A Daily Assistant, Delft University of Technology & The University of Queensland\n22.05. Lab: Multi-agent Environment\nOn material of session 20.05'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 5, 'page_label': '6'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 6 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nThis lab will introduce a short walkthrough to creation of a multi-agent environment for automatedmeeting scheduling and preparation. We will see how the coordinator agent will communicate with twoauxiliary agents to check time availability and prepare an agenda for the meeting.\nReading:\n Multi-agent network, LangGraph\n Human-in-the-loop, LangGraph\nPlan-and-Execute, LangGraph\nReflection, LangGraph\n Multi-agent supervisor, LangGraph\nQuick Start, AutoGen\nWeek 627.05. Lecture: Software Development Pt. 1: Code Generation, Evaluation &TestingThis lectures opens a new lecture mini-cycle dedicated to software development. The first lectureoverviews how LLMs are used to generate reliable code and how generated code is tested andimproved to deal with the errors.\nKey points:\nCode generation & refining\nAutomated testing\nGenerated code evaluation\nCore Reading:\nLarge Language Model-Based Agents for Software Engineering: A Survey, Fudan University,\nNanyang Technological University & University of Illinois at Urbana-Champaign\n CodeRL: Mastering Code Generation through Pretrained Models and Deep ReinforcementLearning (pages 1-20), Salesforce Research\nThe ART of LLM Refinement: Ask, Refine, and Trust, ETH Zurich & Meta AI\nAdditional Reading:\nPlanning with Large Language Models for Code Generation, MIT-IBM Watson AI Lab et al.\nCode Repair with LLMs gives an Exploration-Exploitation Tradeoff, Cornell, Shanghai Jiao Tong\nUniversity & University of Toronto\nChatUniTest: A Framework for LLM-Based Test Generation, Zhejiang University & Hangzhou City\nUniversity\nTestART: Improving LLM-based Unit Testing via Co-evolution of Automated Generation and RepairIteration, Nanjing University & Huawei Cloud Computing Technologies\nEvaluating Large Language Models Trained on Code, `OpenAI\n Code Generation on HumanEval, OpenAI (leaderboard)\nCodeJudge: Evaluating Code Generation with Large Language Models, Huazhong University of\nScience and Technology & Purdue University\n29.05.Ausfalltermin'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 6, 'page_label': '7'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 7 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nWeek 703.06. Lecture: Software Development Pt. 2: Copilots, LLM-powered WebsitesThe second and the last lecture of the software development cycle focuses on practical application ofLLM code generation, in particular, on widely-used copilots (real-time code generation assistants) andLLM-supported web development.\nKey points:\nCopilots & real-time hints\nLLM-powered websites\nLLM-supported deployment\nFurther considerations: reliability, sustainability etc.\nCore Reading:\n LLMs in Web Development: Evaluating LLM-Generated PHP Code Unveiling Vulnerabilities andLimitations (pages 1-11), University of Oslo\nA Real-World WebAgent with Planning, Long Context Understanding, and Program Synthesis,Google DeepMind & The University of Tokyo\nCan ChatGPT replace StackOverflow? A Study on Robustness and Reliability of Large LanguageModel Code Generation, UC San Diego\nAdditional Reading:\nDesign and evaluation of AI copilots – case studies of retail copilot templates, Microsoft\n Your AI Companion, Microsoft (blog post)\nGitHub Copilot, GitHub (product page)\n Research: quantifying GitHub Copilot’s impact on developer productivity and happiness, GitHub\n(blog post)\n Cursor: The AI Code Editor, Cursor (product page)\nAutomated Unit Test Improvement using Large Language Models at Meta, Meta\nHuman-In-the-Loop Software Development Agents, Monash University, The University of\nMelbourne & Atlassian\nAn LLM-based Agent for Reliable Docker Environment Configuration, Harbin Institute of\nTechnology & ByteDance\nLearn to Code Sustainably: An Empirical Study on LLM-based Green Code Generation, TWT GmbH\nScience & Innovation et al.\nEnhancing Large Language Models for Secure Code Generation: A Dataset-driven Study onVulnerability Mitigation, South China University of Technology & University of Innsbruck\n05.06 Lab: LLM-powered Website\nOn material of session 03.06\nIn this lab, we’ll have the LLM make a website for us: it will both generate the contents of the websiteand generate all the code required for rendering, styling and navigation.\nReading:\nsee session 22.05\n HTML: Creating the content, MDN\n Getting started with CSS, MDN'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 7, 'page_label': '8'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 8 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nWeek 8: Having Some Rest10.06.Ausfalltermin\n12.06.Ausfalltermin\nWeek 917.06. Pitch: RAG Chatbot\nOn material of session 06.05 and session 13.05\nThe first pitch will be dedicated to a custom RAG chatbot that the contractors (the presentingstudents, see the infos about Pitches) will have prepared to present. The RAG chatbot will have to beable to retrieve specific information from the given documents (not from the general knowledge!) anduse it in its responses. Specific requirements will be released on 22.05.\nReading: see session 06.05, session 08.05, session 13.05, and session 15.05\n19.06.Ausfalltermin\nWeek 1024.06. Pitch: Handling Customer Requests in a Multi-agent Environment\nOn material of session 20.05\nIn the second pitch, the contractors will present their solution to automated handling of customerrequests. The solution will have to introduce a multi-agent environment to take off working load froman imagined support team. The solution will have to read and categorize tickets, generate replies and(in case of need) notify the human that their interference is required. Specific requirements will bereleased on 27.05.\nReading: see session 20.05 and session 22.05\n26.06. Lecture: Other Business Applications: Game Design, Financial Analysisetc.This lecture will serve a small break and will briefly go over other business scenarios that the LLMs areused in.\nKey points:\nGame design & narrative games\nFinancial applications\nContent creation\nAdditional Reading:'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 8, 'page_label': '9'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 9 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nPlayer-Driven Emergence in LLM-Driven Game Narrative, Microsoft Research\nGenerating Converging Narratives for Games with Large Language Models, U.S. Army Research\nLaboratory\nGame Agent Driven by Free-Form Text Command: Using LLM-based Code Generation and BehaviorBranch, University of Tokyo\n AI Dungeon Games, AI Dungeon (game catalogue)\n AI Town, Andreessen Horowitz & Convex (game)\nIntroducing NPC-Playground, a 3D playground to interact with LLM-powered NPCs, HuggingFace\n(blog post)\nBlip, bliporg (GitHub repo)\ngigax, GigaxGames (GitHub repo)\nLarge Language Models in Finance: A Survey, Columbia & New York University\nFinLlama: Financial Sentiment Classification for Algorithmic Trading Applications, Imperial College\nLondon & MIT\nEquipping Language Models with Tool Use Capability for Tabular Data Analysis in Finance, Monash\nUniversity\nLLM4EDA: Emerging Progress in Large Language Models for Electronic Design Automation,Shanghai Jiao Tong University et al.\nAssisting in Writing Wikipedia-like Articles From Scratch with Large Language Models, Stanford\nLarge Language Models Can Solve Real-World Planning Rigorously with Formal Verification Tools,MIT, Harvard University & MIT-IBM Watson AI Lab\nPart 2: Applications in ScienceWeek 1101.07. Lecture: LLMs in Research: Experiment Planning & HypothesisGenerationThe first lecture dedicated to scientific applications shows how LLMs are used to plan experiments andgenerate hypothesis to accelerate research.\nKey points:\nExperiment planning\nHypothesis generation\nPredicting possible results\nCore Reading:\n Hypothesis Generation with Large Language Models (pages 1-9), University of Chicago &\nToyota Technological Institute at Chicago\n LLMs for Science: Usage for Code Generation and Data Analysis (pages 1-6), TUM\nEmergent autonomous scientific research capabilities of large language models, Carnegie Mellon\nUniversity\nAdditional Reading:'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 9, 'page_label': '10'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 10 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nImproving Scientific Hypothesis Generation with Knowledge Grounded Large Language Models,University of Virginia\nPaper Copilot: A Self-Evolving and Efficient LLM System for Personalized Academic Assistance,University of Illinois at Urbana-Champaign, Carnegie Mellon University & Carleton College\nSciLitLLM: How to Adapt LLMs for Scientific Literature Understanding, University of Science and\nTechnology of China & DP Technology\nMapping the Increasing Use of LLMs in Scientific Papers, Stanford\n03.07: Lab: Experiment Planning & Hypothesis Generation\nOn material of session 01.07\nIn this lab, we’ll practice in facilitating researcher’s work with LLMs on the example of a toy scientificresearch.\nReading: see session 22.05\nWeek 1208.07: Pitch: Agent for Code Generation\nOn material of session 27.05\nThis pitch will revolve around the contractors’ implementation of a self-improving code generator. Thecode generator will have to generate both scripts and test cases for a problem given in the inputprompt, run the tests and refine the code if needed. Specific requirements will be released on 17.06.\nReading: see session 27.05 and session 05.06\n10.07. Lecture: Other Applications in Science: Drug Discovery, Math etc. &Scientific ReliabilityThe final core topic will mention other scientific applications of LLMs that were not covered in theprevious lectures and address the question of reliability of the results obtained with LLMs.\nKey points:\nDrug discovery, math & other applications\nScientific confidence & reliability\nCore Reading:\n Can LLMs replace Neil deGrasse Tyson? Evaluating the Reliability of LLMs as ScienceCommunicators (pages 1-9), Indian Institute of Technology\nAdditional Reading:'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 10, 'page_label': '11'}, page_content='12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 11 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nA Comprehensive Survey of Scientific Large Language Models and Their Applications in ScientificDiscovery, University of Illinois at Urbana-Champaign et al.\nLarge Language Models in Drug Discovery and Development: From Disease Mechanisms to ClinicalTrials, Department of Data Science and AI, Monash University et al.\nLLM-SR: Scientific Equation Discovery via Programming with Large Language Models, Virginia\nTech et al.\n Awesome Scientific Language Models, yuzhimanhua (GitHub repo)\nCURIE: Evaluating LLMs On Multitask Scientific Long Context Understanding and Reasoning, Google\net al.\nMultiple Choice Questions: Reasoning Makes Large Language Models (LLMs) More Self-ConfidentEven When They Are Wrong, Nanjing University of Aeronautics and Astronautics et al.\nBlock 3: Wrap-upWeek 1315.07. Pitch: Agent for Web Development\nOn material of session 03.06\nThe contractors will present their agent that will have to generate full (minimalistic) websites by aprompt. For each website, the agent will have to generate its own style and a simple menu with workingnavigation as well as the contents. Specific requirements will be released on 24.06.\nReading: see session 03.06 and session 05.06\n17.07. Lecture: Role of AI in Recent YearsThe last lecture of the course will turn to societal considerations regarding LLMs and AI in general andwill investigate its role and influence on the humanity nowadays.\nKey points:\nStudies on influence of AI in the recent years\nStudies on AI integration rate\nEthical, legal & environmental aspects\nCore Reading:\n Protecting Human Cognition in the Age of AI (pages 1-5), The University of Texas at Austin et al.\n Artificial intelligence governance: Ethical considerations and implications for social responsibility(pages 1-12), University of Malta\nAdditional Reading:'),
 Document(metadata={'producer': 'macOS Version 12.7.6 (Build 21H1320) Quartz PDFContext', 'creator': 'Safari', 'creationdate': "D:20250512152829Z00'00'", 'title': 'Topics Overview - LLM-based Assistants', 'moddate': "D:20250512152829Z00'00'", 'source': './topic_overview.pdf', 'total_pages': 12, 'page': 11, 'page_label': '12'}, page_content="12.05.25, 17:28Topics Overview - LLM-based Assistants\nPage 12 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html\nAugmenting Minds or Automating Skills: The Differential Role of Human Capital in Generative AI’sImpact on Creative Tasks, Tsinghua University & Wuhan University of Technology\nHuman Creativity in the Age of LLMs: Randomized Experiments on Divergent and ConvergentThinking, University of Toronto\nEmpirical evidence of Large Language Model’s influence on human spoken communication, Max-\nPlanck Institute for Human Development\n The 2025 AI Index Report: Top Takeaways, Stanford\nGrowing Up: Navigating Generative AI’s Early Years – AI Adoption Report: Executive Summary, AI at\nWharton\nEthical Implications of AI in Data Collection: Balancing Innovation with Privacy, AI Data Chronicles\nLegal and ethical implications of AI-based crowd analysis: the AI Act and beyond, Vrije\nUniversiteit\nA Survey of Sustainability in Large Language Models: Applications, Economics, and Challenges,Cleveland State University et al.\nWeek 1422.07. Pitch: LLM-based Research Assistant\nOn material of session 01.07\nThe last pitch will introduce an agent that will have to plan the research, generate hypotheses, find theliterature etc. for a given scientific problem. It will then have to introduce its results in form of a TODOor a guide for the researcher to start off of. Specific requirements will be released on 01.07.\nReading: see session 01.07 and session 03.07\n24.07. Debate: Role of AI in Recent Years + Wrap-up\nOn material of session 17.07\nThe course will be concluded by the final debates, after which a short Q&A session will be held.\nDebate topics:\nLLM Behavior: Evidence of Awareness or Illusion of Understanding?\nShould We Limit the Usage of AI?\nReading: see session 17.07\nCopyright © 2025, Maksim ShmaltsMade with Sphinx and @pradyunsg's Furo")]

print(pages[0].page_content)

12.05.25, 17:28Topics Overview - LLM-based Assistants
Page 1 of 12https://maxschmaltz.github.io/Course-LLM-based-Assistants/infos/topic_overview.html
To p i c s  O v e r v i e wThe schedule is preliminary and subject to changes!
The reading for each lecture is given as references to the sources the respective lectures base on. Youare not obliged to read anything. However, you are strongly encouraged to read references marked bypin emojis 
: those are comprehensive overviews on the topics or important works that are beneficialfor a better understanding of the key concepts. For the pinned papers, I also specify the pages span foryou to focus on the most important fragments. Some of the sources are also marked with a popcornemoji 
: that is misc material you might want to take a look at: blog posts, GitHub repos, leaderboardsetc. (also a couple of LLM-based games). For each of the sources, I also leave my subjectiveestimation of how important this work is for this specific topic: from yellow 
 ‘partially useful’ thoughorange 
 ‘useful’ to red 
 ‘crucial findings / thoughts’.  T h e s e  e s t i m a t i o n s  w i l l  b e  c o n t i n u o u s l yupdated as I revise the materials.
For the labs, you are provided with practical tutorials that respective lab tasks will mostly derive from.The core tutorials are marked with a writing emoji 
; you are asked to inspect them in advance(better yet: try them out). On lab sessions, we will only briefly recap them so it is up to you to preparein advance to keep up with the lab.
Disclaimer: the reading entries are no proper citations; the bibtex references as well as detailed infosabout the authors, publish date etc. can be found under the entry links.
Block 1: IntroWeek 122.04. Lecture: LLMs as a Form of Intelligence vs LLMs as Statistical MachinesThat is an introductory lecture, in which I will briefly introduce the course and we’ll have a warming updiscussion about different perspectives on LLMs’ nature. We will focus on two prominent outlooks: LLMis a form of intelligence and LLM is a complex statistical machine. We’ll discuss differences of LLMswith human intelligence and the degree to which LLMs exhibit (self-)awareness.
Key points:
Course introduction
Different perspectives on the nature of LLMs
Similarities and differences between human and artificial intelligence
LLMs’ (self-)awareness
Core Reading:
 The Debate Over Understanding in AI’s Large Language Models (pages 1-7), Santa Fe
Institute 
Meaning without reference in large language models, UC Berkeley & DeepMind 
Dissociating language and thought in large language models (intro [right after the abstract, seemore on the sectioning in this paper at the bottom of page 2], sections 1, 2.3 [LLMs are predictive…], 3-5), The University of Texas at Austin et al. 
Additional Reading:
LLM-basedAssistants
INFOS AND STUFF
BLOCK 1: INTRO
BLOCK 2: CORE TOPICS | PART 1:BUSINESS APPLICATIONS
BLOCK 2: CORE TOPICS | PART 2:APPLICATIONS IN SCIENCE
BLOCK 3: WRAP-UP
Topics Overview
Debates
Pitches
LLM Inference Guide
22.04. LLMs as a Form ofIntelligence vs LLMs asStatistical Machines
24.04. LLM & Agent Basics
29.04. Intro to LangChain 
!
"
06.05. Virtual Assistants Pt. 1:Chatbots
08.05. Basic LLM-basedChatbot 
#
Under development
Under development
Search

As you can see, the result is not satisfying because the PDF has a more complex structure than just one-paragraph text. To handle it’s layout, we could use UnstructuredLoader that will return a Document not for the whole page but for a single structure; for simplicity, let’s now go with PyPDF.

Chunking¶

During RAG, relevant documents are usually retrieved by semantic similarity that is calculated between the search query and each document in the index. However, if we calculate vectors for the entire PDF pages, we risk not to capture any meaning in the embedding because the context is just too long. That is why usually, loaded text is chunked in a RAG application; embeddings for smaller pieces of text are more discriminative, and thus the relevant context may be retrieved better. Furthermore, it ensure process consistency when working documents of varying sizes, and is just more computationally efficient.

Different approaches to chunking are described in tutorial Text splitters from LangChain. We’ll use RecursiveCharacterTextSplitter – a good option in terms of simplicity-quality ratio for simple cases. This splitter tries to keep text structures (paragraphs, sentences) together and thus maintain text coherence in chunks.

from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.documents import Document

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=512, # maximum number of characters in a chunk
    chunk_overlap=50 # number of characters to overlap between chunks
)

def split_page(page: Document) -> List[Document]:
    chunks = text_splitter.split_text(page.page_content)
    return [
        Document(
            page_content=chunk,
            metadata=page.metadata,
        ) 
        for chunk in chunks
    ]

docs = []
for page in pages:
    docs += split_page(page)

print(f"Converted {len(pages)} pages into {len(docs)} chunks.")

Converted 12 pages into 66 chunks.

print(docs[3].page_content)

For the labs, you are provided with practical tutorials that respective lab tasks will mostly derive from.The core tutorials are marked with a writing emoji 
; you are asked to inspect them in advance(better yet: try them out). On lab sessions, we will only briefly recap them so it is up to you to preparein advance to keep up with the lab.

Convert to Embeddings¶

As discussed, the retrieval usually succeeds by vector similarity and the index contains not the actual texts but their vector representations. Vector representations are created by embedding models – models usually made specifically for this objective by being trained to create more similar vectors for more similar sentences and to push apart dissimilar sentences in the vector space.

We will use the nv-embedqa-e5-v5 model – a model from NVIDIA pretrained for English QA.

from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings

EMBEDDING_NAME = "nvidia/nv-embedqa-e5-v5"

embeddings = NVIDIAEmbeddings(
    model=EMBEDDING_NAME, 
    api_key=os.getenv("NVIDIA_API_KEY")
)

An embedding model receives an input text and returns a dense vector that is believed to capture its semantic properties.

test_embedding = embeddings.embed_query("Sample sentence to embed")
test_embedding

[-0.0149383544921875,
 -0.03466796875,
 0.0280303955078125,
 0.0283050537109375,
 0.0264892578125,
 0.0285186767578125,
 0.00839996337890625,
 -0.034698486328125,
 0.02716064453125,
 -0.0160980224609375,
 0.06634521484375,
 0.041412353515625,
 0.0421142578125,
 -0.02703857421875,
 -0.033599853515625,
 0.0215606689453125,
 0.0092010498046875,
 -0.0203857421875,
 -0.033447265625,
 0.036468505859375,
 -0.0037174224853515625,
 -0.0267791748046875,
 0.0172119140625,
 0.027191162109375,
 0.03961181640625,
 0.01403045654296875,
 0.0002256631851196289,
 -0.0247802734375,
 0.006275177001953125,
 0.057891845703125,
 0.033355712890625,
 -0.0011873245239257812,
 0.023223876953125,
 0.0181427001953125,
 -0.00308990478515625,
 -0.016998291015625,
 -0.0247039794921875,
 0.0113067626953125,
 0.053863525390625,
 -0.0166168212890625,
 -0.0241546630859375,
 -0.06439208984375,
 0.049652099609375,
 0.04217529296875,
 -0.0178375244140625,
 -0.0159149169921875,
 0.01025390625,
 -0.04742431640625,
 0.01513671875,
 0.0248565673828125,
 -0.01312255859375,
 -0.01031494140625,
 0.036865234375,
 -0.0175933837890625,
 0.0026874542236328125,
 0.040557861328125,
 0.0341796875,
 -0.01544189453125,
 -0.0030155181884765625,
 -0.02008056640625,
 -0.005878448486328125,
 -0.00128936767578125,
 -0.040191650390625,
 -0.0572509765625,
 -0.040069580078125,
 0.0264892578125,
 0.0272674560546875,
 0.01898193359375,
 0.023345947265625,
 -0.00994873046875,
 0.0036106109619140625,
 0.00020182132720947266,
 -0.017852783203125,
 0.0224609375,
 0.01148223876953125,
 -0.0226898193359375,
 -0.055816650390625,
 0.0169677734375,
 0.01413726806640625,
 -0.0211944580078125,
 -0.0236663818359375,
 0.006343841552734375,
 -0.01413726806640625,
 0.04730224609375,
 -0.00844573974609375,
 -0.0257568359375,
 0.0224609375,
 -0.0039520263671875,
 0.020172119140625,
 -0.0187835693359375,
 0.0050506591796875,
 0.0072479248046875,
 0.029144287109375,
 0.0238494873046875,
 0.0014591217041015625,
 -0.0196533203125,
 -0.051605224609375,
 0.050750732421875,
 -0.033294677734375,
 -0.0014400482177734375,
 -0.043701171875,
 -0.037139892578125,
 0.0548095703125,
 0.04132080078125,
 -0.005390167236328125,
 -0.011932373046875,
 -0.0240020751953125,
 -0.060089111328125,
 0.03997802734375,
 -0.040496826171875,
 -0.032440185546875,
 0.02215576171875,
 0.0150299072265625,
 0.00702667236328125,
 0.055145263671875,
 -0.025970458984375,
 -0.01206207275390625,
 -0.0025539398193359375,
 0.0203704833984375,
 0.09100341796875,
 0.03985595703125,
 -0.02471923828125,
 0.03778076171875,
 0.0197906494140625,
 -0.0618896484375,
 0.050384521484375,
 0.032196044921875,
 -0.0126495361328125,
 -0.038055419921875,
 0.00605010986328125,
 0.004913330078125,
 0.0208587646484375,
 -0.0570068359375,
 -0.0233154296875,
 0.04315185546875,
 0.032958984375,
 -0.109619140625,
 -0.0238037109375,
 0.04669189453125,
 0.01160430908203125,
 0.008880615234375,
 -0.04119873046875,
 0.0007123947143554688,
 -0.00258636474609375,
 0.02496337890625,
 0.050750732421875,
 -0.027679443359375,
 0.07110595703125,
 -0.042083740234375,
 0.0533447265625,
 0.013580322265625,
 -0.0203094482421875,
 -0.0400390625,
 0.03076171875,
 -0.0055084228515625,
 -0.052734375,
 0.0272064208984375,
 0.061065673828125,
 0.009552001953125,
 -0.043853759765625,
 0.0283203125,
 -0.0093231201171875,
 -0.0215606689453125,
 0.01263427734375,
 -0.0296630859375,
 -0.0518798828125,
 0.0302276611328125,
 0.0294952392578125,
 0.025177001953125,
 0.0166778564453125,
 0.00391387939453125,
 -0.004611968994140625,
 -0.0013332366943359375,
 0.0004973411560058594,
 0.0192718505859375,
 0.025299072265625,
 0.04998779296875,
 0.069091796875,
 0.009918212890625,
 -0.0029144287109375,
 -0.0038509368896484375,
 -0.045867919921875,
 0.039794921875,
 0.034271240234375,
 -0.026947021484375,
 0.03680419921875,
 -0.00124359130859375,
 -0.0005440711975097656,
 0.001010894775390625,
 -0.037506103515625,
 -0.057342529296875,
 -0.069580078125,
 0.08404541015625,
 -0.013580322265625,
 -0.0186004638671875,
 -0.05718994140625,
 0.007843017578125,
 -0.0118255615234375,
 -0.05596923828125,
 -0.01555633544921875,
 -0.005893707275390625,
 -0.01551055908203125,
 0.0394287109375,
 -0.039459228515625,
 -0.0047454833984375,
 0.0287933349609375,
 0.031646728515625,
 0.0260009765625,
 0.03076171875,
 0.01485443115234375,
 0.004825592041015625,
 7.158517837524414e-05,
 0.01491546630859375,
 0.0090484619140625,
 -0.0116119384765625,
 -0.0198211669921875,
 0.033050537109375,
 -0.044189453125,
 -0.007450103759765625,
 0.01528167724609375,
 -0.0141143798828125,
 0.043701171875,
 0.0009212493896484375,
 0.003612518310546875,
 -0.0061798095703125,
 -0.0255279541015625,
 0.057647705078125,
 0.0149383544921875,
 0.038116455078125,
 -0.033935546875,
 -0.00676727294921875,
 -0.058135986328125,
 0.035858154296875,
 -0.00732421875,
 -0.0360107421875,
 0.0111236572265625,
 0.036895751953125,
 -0.0615234375,
 0.01161956787109375,
 -0.0215606689453125,
 -0.047149658203125,
 0.005767822265625,
 0.05035400390625,
 0.00829315185546875,
 0.038421630859375,
 0.047271728515625,
 -0.07818603515625,
 0.02313232421875,
 0.04266357421875,
 0.0017232894897460938,
 0.0129852294921875,
 0.032684326171875,
 0.01025390625,
 -0.00722503662109375,
 0.02825927734375,
 -0.0650634765625,
 0.0242156982421875,
 -0.0172119140625,
 -0.026458740234375,
 -0.0183563232421875,
 -0.06439208984375,
 0.047943115234375,
 0.010345458984375,
 -0.0245513916015625,
 0.03900146484375,
 0.04083251953125,
 -0.013824462890625,
 -0.02215576171875,
 0.0635986328125,
 -0.043487548828125,
 0.0220947265625,
 0.007678985595703125,
 -0.0469970703125,
 -0.019989013671875,
 -0.01806640625,
 0.00447845458984375,
 -0.0236663818359375,
 -0.031341552734375,
 0.01004791259765625,
 0.0278167724609375,
 -0.0087127685546875,
 0.018218994140625,
 -0.01654052734375,
 0.0061798095703125,
 -0.037353515625,
 0.04522705078125,
 -0.0167236328125,
 -0.022735595703125,
 0.0066680908203125,
 0.0200958251953125,
 0.011016845703125,
 -0.006195068359375,
 -0.0218963623046875,
 -0.07635498046875,
 -0.010009765625,
 0.0194091796875,
 0.0009889602661132812,
 -0.02264404296875,
 -0.025665283203125,
 0.01021575927734375,
 -0.015411376953125,
 -0.01348876953125,
 -0.0163421630859375,
 0.03607177734375,
 -0.01207733154296875,
 -0.0204315185546875,
 -0.045196533203125,
 0.0265960693359375,
 -0.031494140625,
 0.0413818359375,
 0.004802703857421875,
 -0.05010986328125,
 0.042816162109375,
 0.008880615234375,
 -0.0007367134094238281,
 0.044769287109375,
 0.0229644775390625,
 0.0433349609375,
 -0.035491943359375,
 0.01203155517578125,
 0.03167724609375,
 -0.039093017578125,
 -0.04864501953125,
 -0.00131988525390625,
 -0.01812744140625,
 0.0078887939453125,
 -0.00991058349609375,
 0.02923583984375,
 -0.0004565715789794922,
 -0.02471923828125,
 -0.053955078125,
 0.0006866455078125,
 0.022064208984375,
 -0.0288848876953125,
 -0.0107879638671875,
 -0.0509033203125,
 -0.04473876953125,
 -0.0261688232421875,
 0.01126861572265625,
 0.01451873779296875,
 0.048675537109375,
 0.01861572265625,
 0.034637451171875,
 0.03485107421875,
 0.04559326171875,
 -0.01479339599609375,
 -0.04132080078125,
 0.051239013671875,
 -0.02471923828125,
 -0.0186614990234375,
 -0.00839996337890625,
 0.0027980804443359375,
 0.0094451904296875,
 0.01336669921875,
 -0.003398895263671875,
 0.01067352294921875,
 0.0296478271484375,
 0.0046234130859375,
 -0.01312255859375,
 0.0131378173828125,
 0.0555419921875,
 0.0218963623046875,
 -0.03387451171875,
 -0.041839599609375,
 -0.0396728515625,
 0.01308441162109375,
 0.0010004043579101562,
 0.006893157958984375,
 0.006748199462890625,
 0.07275390625,
 0.01329803466796875,
 0.020599365234375,
 -0.0038890838623046875,
 0.0204925537109375,
 0.00167083740234375,
 0.011749267578125,
 0.02777099609375,
 0.0130767822265625,
 0.0175933837890625,
 -0.020294189453125,
 0.002025604248046875,
 -0.0047760009765625,
 -0.022552490234375,
 -0.01019287109375,
 -0.0660400390625,
 -0.0263519287109375,
 -0.008270263671875,
 0.032318115234375,
 0.00299835205078125,
 -0.01605224609375,
 0.055633544921875,
 -0.0055999755859375,
 -0.0200958251953125,
 -0.003330230712890625,
 -0.03179931640625,
 0.01230621337890625,
 -0.0003800392150878906,
 -0.04315185546875,
 -0.00653839111328125,
 0.0207672119140625,
 -0.0411376953125,
 0.07562255859375,
 0.019622802734375,
 0.01129913330078125,
 0.02691650390625,
 0.0023059844970703125,
 -0.02783203125,
 -0.015289306640625,
 -6.639957427978516e-05,
 0.055816650390625,
 0.0355224609375,
 0.038330078125,
 0.01007843017578125,
 -0.028106689453125,
 0.0008792877197265625,
 -0.0241546630859375,
 -0.0084686279296875,
 0.02752685546875,
 0.058013916015625,
 0.01338958740234375,
 0.016510009765625,
 -0.0263214111328125,
 0.020721435546875,
 0.03607177734375,
 -0.07855224609375,
 -0.03436279296875,
 0.055511474609375,
 -0.007534027099609375,
 -0.0032329559326171875,
 0.0389404296875,
 0.02490234375,
 0.02423095703125,
 -0.0150299072265625,
 -0.01091766357421875,
 -0.0445556640625,
 0.0235595703125,
 -0.007198333740234375,
 -0.0350341796875,
 0.0146484375,
 -0.0172119140625,
 -0.010467529296875,
 0.0272064208984375,
 -0.027130126953125,
 -0.00777435302734375,
 0.0163421630859375,
 0.0012426376342773438,
 -0.03094482421875,
 -0.0184478759765625,
 -0.0282440185546875,
 0.0266571044921875,
 0.0438232421875,
 -0.0191650390625,
 -0.010009765625,
 -0.008087158203125,
 -0.08538818359375,
 0.022064208984375,
 0.06011962890625,
 0.070068359375,
 -0.0308074951171875,
 -0.0209197998046875,
 -0.047332763671875,
 0.0185699462890625,
 -0.01129913330078125,
 -0.001155853271484375,
 -0.005718231201171875,
 -0.065185546875,
 0.00330352783203125,
 -0.00018036365509033203,
 -0.004703521728515625,
 -0.009246826171875,
 0.0230255126953125,
 -0.0160980224609375,
 0.0033321380615234375,
 -0.0166168212890625,
 -0.008270263671875,
 0.011199951171875,
 0.035064697265625,
 0.0211944580078125,
 -0.018310546875,
 0.0093231201171875,
 0.0007100105285644531,
 -0.006504058837890625,
 0.0401611328125,
 -0.058349609375,
 -0.058563232421875,
 0.0307769775390625,
 0.045196533203125,
 -0.004322052001953125,
 0.00554656982421875,
 -0.0200958251953125,
 0.01476287841796875,
 0.001979827880859375,
 0.0219879150390625,
 -0.038177490234375,
 0.0024852752685546875,
 -0.051025390625,
 -0.0188140869140625,
 0.0125579833984375,
 -0.022308349609375,
 0.055572509765625,
 0.0275726318359375,
 -0.00732421875,
 0.0082550048828125,
 0.00958251953125,
 0.0293121337890625,
 -0.04913330078125,
 -0.0175628662109375,
 -0.026031494140625,
 0.038177490234375,
 -0.02874755859375,
 0.0267333984375,
 0.039459228515625,
 0.0010366439819335938,
 -0.044891357421875,
 -0.03155517578125,
 -0.0221099853515625,
 0.01129913330078125,
 0.0158843994140625,
 -0.0218505859375,
 -0.01026153564453125,
 -0.01088714599609375,
 0.004848480224609375,
 0.0258331298828125,
 -0.036163330078125,
 -0.0540771484375,
 -0.01039886474609375,
 -0.005092620849609375,
 0.022613525390625,
 0.016265869140625,
 0.005146026611328125,
 -0.0233001708984375,
 -0.0264892578125,
 -0.0196380615234375,
 0.0182342529296875,
 0.01580810546875,
 0.0380859375,
 -0.03173828125,
 -0.07086181640625,
 0.005401611328125,
 0.033599853515625,
 0.03662109375,
 0.0037250518798828125,
 -0.000682830810546875,
 -0.0186767578125,
 0.00199127197265625,
 -0.01690673828125,
 -0.062744140625,
 -0.0308990478515625,
 0.0280609130859375,
 0.006683349609375,
 0.01276397705078125,
 -0.036590576171875,
 -0.037811279296875,
 -0.0361328125,
 -0.044219970703125,
 0.01099395751953125,
 0.058349609375,
 -0.007762908935546875,
 0.01105499267578125,
 -0.0218505859375,
 -0.00801849365234375,
 0.048583984375,
 0.03253173828125,
 -0.0104522705078125,
 -0.0128326416015625,
 -0.01580810546875,
 -0.01010894775390625,
 -0.021820068359375,
 0.023345947265625,
 -0.0243988037109375,
 -0.030914306640625,
 -0.0231475830078125,
 -0.036102294921875,
 -0.0063629150390625,
 -0.01702880859375,
 0.038665771484375,
 0.019561767578125,
 -0.01555633544921875,
 -0.0355224609375,
 0.0122528076171875,
 0.01308441162109375,
 -0.0006513595581054688,
 0.03662109375,
 0.0173187255859375,
 0.064697265625,
 0.0180206298828125,
 -0.01116180419921875,
 -0.001995086669921875,
 0.0214385986328125,
 0.032928466796875,
 -0.010589599609375,
 0.070068359375,
 0.0122833251953125,
 0.04412841796875,
 0.005336761474609375,
 0.02899169921875,
 0.0093231201171875,
 -0.01134490966796875,
 0.004177093505859375,
 -0.0204315185546875,
 0.0008978843688964844,
 -0.001094818115234375,
 0.033599853515625,
 0.005126953125,
 -0.04046630859375,
 0.002315521240234375,
 -0.00643157958984375,
 0.01268768310546875,
 -0.0125579833984375,
 0.00930023193359375,
 -0.01485443115234375,
 -0.00028634071350097656,
 0.031707763671875,
 -0.0039825439453125,
 -0.0290985107421875,
 0.039581298828125,
 0.06512451171875,
 0.0552978515625,
 0.0239105224609375,
 0.064697265625,
 0.0186767578125,
 0.0177154541015625,
 0.028961181640625,
 0.0188446044921875,
 -0.04510498046875,
 -0.017669677734375,
 -0.01947021484375,
 0.0307464599609375,
 -0.04827880859375,
 -0.02532958984375,
 -0.05511474609375,
 -0.006175994873046875,
 0.01454925537109375,
 -0.0290374755859375,
 0.003948211669921875,
 -0.0018672943115234375,
 -0.0579833984375,
 -0.08843994140625,
 0.0214996337890625,
 -0.037506103515625,
 -0.0213623046875,
 -0.01666259765625,
 -0.005634307861328125,
 -0.005496978759765625,
 0.02117919921875,
 0.032623291015625,
 -0.0428466796875,
 -0.085205078125,
 -0.036407470703125,
 -0.046051025390625,
 -0.016510009765625,
 0.036224365234375,
 0.00017917156219482422,
 -0.0274658203125,
 0.0237274169921875,
 0.0677490234375,
 0.093994140625,
 0.022369384765625,
 -0.004436492919921875,
 -0.0004718303680419922,
 0.0178680419921875,
 0.045623779296875,
 -0.0229644775390625,
 -0.046844482421875,
 -0.03643798828125,
 0.1600341796875,
 0.023162841796875,
 0.008331298828125,
 -0.0249786376953125,
 -0.0038013458251953125,
 0.01255035400390625,
 0.01297760009765625,
 -0.0081634521484375,
 -0.0399169921875,
 0.006084442138671875,
 -0.0150299072265625,
 -0.05023193359375,
 0.04168701171875,
 0.0014791488647460938,
 -0.010467529296875,
 -0.004199981689453125,
 -0.0806884765625,
 0.0022830963134765625,
 -0.0311279296875,
 0.01071929931640625,
 0.016143798828125,
 0.0089874267578125,
 -0.087890625,
 -0.004486083984375,
 -0.01036834716796875,
 -0.004886627197265625,
 -0.01467132568359375,
 -0.00363922119140625,
 0.026519775390625,
 0.0445556640625,
 0.0185546875,
 0.0007457733154296875,
 0.009796142578125,
 -0.0010776519775390625,
 0.016357421875,
 -0.058319091796875,
 0.00782012939453125,
 -0.003757476806640625,
 -0.01227569580078125,
 -0.01241302490234375,
 -0.0039215087890625,
 0.034423828125,
 0.036865234375,
 0.0215606689453125,
 -0.06689453125,
 -0.006587982177734375,
 0.0299835205078125,
 -0.0039520263671875,
 -0.0006346702575683594,
 -0.01290130615234375,
 0.00946044921875,
 0.0285491943359375,
 -0.03204345703125,
 -0.0191497802734375,
 0.0167694091796875,
 0.0238494873046875,
 0.006191253662109375,
 -0.036224365234375,
 -0.0182037353515625,
 -0.02923583984375,
 0.0022335052490234375,
 -0.0041961669921875,
 0.054534912109375,
 0.0071868896484375,
 0.0517578125,
 0.035369873046875,
 -0.006195068359375,
 -0.03582763671875,
 -0.010833740234375,
 -0.035919189453125,
 0.001667022705078125,
 0.00930023193359375,
 0.0168914794921875,
 -0.041412353515625,
 -0.0061798095703125,
 -0.003147125244140625,
 -0.050537109375,
 0.01239013671875,
 0.00014650821685791016,
 -0.0330810546875,
 -0.0214996337890625,
 0.01165771484375,
 0.0321044921875,
 -0.034423828125,
 0.00970458984375,
 0.008941650390625,
 0.0160980224609375,
 0.01200103759765625,
 0.019622802734375,
 0.040985107421875,
 -0.0217742919921875,
 0.0440673828125,
 0.031951904296875,
 0.004909515380859375,
 0.00018727779388427734,
 -0.032196044921875,
 -0.059295654296875,
 -0.054168701171875,
 0.01306915283203125,
 0.0238494873046875,
 0.0302581787109375,
 0.00746917724609375,
 0.0179290771484375,
 0.031524658203125,
 -0.012969970703125,
 0.04742431640625,
 0.0069580078125,
 -0.0088653564453125,
 -0.007640838623046875,
 0.048919677734375,
 0.00565338134765625,
 -0.036865234375,
 -0.0452880859375,
 0.01001739501953125,
 -0.0335693359375,
 -0.0574951171875,
 -0.017852783203125,
 -0.007343292236328125,
 0.0579833984375,
 -0.02130126953125,
 0.055938720703125,
 -0.0173492431640625,
 0.01336669921875,
 -0.019195556640625,
 -0.004730224609375,
 0.0283966064453125,
 -0.007221221923828125,
 0.02532958984375,
 0.0150299072265625,
 -0.0295867919921875,
 0.032745361328125,
 -0.0049896240234375,
 0.06939697265625,
 0.04901123046875,
 0.0166015625,
 0.06787109375,
 0.00771331787109375,
 0.041107177734375,
 -0.028411865234375,
 -0.00662994384765625,
 0.03570556640625,
 0.007213592529296875,
 0.0276947021484375,
 0.034149169921875,
 -0.010223388671875,
 0.017974853515625,
 0.006336212158203125,
 -0.031585693359375,
 -0.05426025390625,
 -0.00830078125,
 -0.01013946533203125,
 0.004467010498046875,
 -0.01169586181640625,
 -0.025604248046875,
 0.02734375,
 -0.01471710205078125,
 -0.0093231201171875,
 -0.0049591064453125,
 0.0195159912109375,
 0.0016946792602539062,
 0.01061248779296875,
 0.06597900390625,
 -0.05584716796875,
 -0.0188446044921875,
 0.038970947265625,
 0.0400390625,
 -0.026458740234375,
 -0.0181732177734375,
 -0.059600830078125,
 0.0113677978515625,
 -0.0229949951171875,
 0.047454833984375,
 -0.0033855438232421875,
 0.004650115966796875,
 -0.03076171875,
 -0.0016345977783203125,
 0.0177154541015625,
 -0.011871337890625,
 0.0061798095703125,
 0.0013113021850585938,
 0.0119171142578125,
 0.02667236328125,
 -0.0010986328125,
 0.0095062255859375,
 -0.01227569580078125,
 0.0249786376953125,
 0.0113677978515625,
 -0.01021575927734375,
 -0.048004150390625,
 -0.006534576416015625,
 0.037750244140625,
 0.0609130859375,
 -0.01378631591796875,
 -0.0037384033203125,
 0.031280517578125,
 -0.051727294921875,
 0.0218505859375,
 0.044525146484375,
 0.02972412109375,
 -0.01287841796875,
 0.07183837890625,
 -0.00287628173828125,
 0.005462646484375,
 0.03265380859375,
 0.0194091796875,
 -0.007274627685546875,
 0.0236968994140625,
 0.042449951171875,
 0.005931854248046875,
 0.0288543701171875,
 -0.0244903564453125,
 0.0293426513671875,
 0.01593017578125,
 -0.00814056396484375,
 0.02740478515625,
 0.002605438232421875,
 -0.017242431640625,
 0.00356292724609375,
 0.0167083740234375,
 -0.0024738311767578125,
 -0.02349853515625,
 0.005077362060546875,
 -0.05303955078125,
 0.00830841064453125,
 -0.01322174072265625,
 -0.0355224609375,
 0.033233642578125,
 -0.0235748291015625,
 -0.02386474609375,
 -0.03424072265625,
 -0.0069122314453125,
 -0.00978851318359375,
 -0.020660400390625,
 -0.005340576171875,
 0.02703857421875,
 -0.007781982421875,
 0.01129150390625,
 -0.01224517822265625,
 0.024322509765625,
 -0.0633544921875,
 0.02606201171875,
 -0.0307159423828125,
 -0.017181396484375,
 0.0142364501953125,
 -0.031982421875,
 -0.0017604827880859375,
 -0.0264892578125,
 0.0162811279296875,
 -0.00954437255859375,
 0.033172607421875,
 0.00337982177734375,
 0.0667724609375,
 -0.0268402099609375,
 -0.0006451606750488281,
 0.0296478271484375,
 -0.005767822265625,
 -0.041259765625,
 -0.050079345703125,
 -0.019866943359375,
 0.042999267578125,
 0.049713134765625,
 0.053009033203125,
 -0.0631103515625,
 -0.00958251953125,
 -0.03448486328125,
 0.036895751953125,
 0.01239013671875,
 -0.04119873046875,
 0.02484130859375,
 0.014556884765625,
 -0.0014200210571289062,
 0.003826141357421875,
 -0.0183868408203125,
 0.0026302337646484375,
 0.008453369140625,
 0.03204345703125,
 -0.01617431640625,
 -0.0006113052368164062,
 0.0439453125,
 0.00417327880859375,
 0.03741455078125,
 0.0175933837890625,
 -0.079345703125,
 0.043701171875,
 -0.03289794921875,
 -0.013397216796875,
 -0.0169677734375,
 -0.031158447265625,
 -0.046600341796875,
 0.01511383056640625,
 -0.0310211181640625,
 0.0293426513671875,
 -0.0199127197265625,
 -0.0005970001220703125,
 0.01444244384765625,
 -0.03448486328125,
 0.002105712890625,
 -0.0015773773193359375,
 -0.01195526123046875,
 0.025970458984375,
 0.0189056396484375,
 0.0142364501953125,
 0.055267333984375,
 0.007259368896484375,
 0.05340576171875,
 -0.0202789306640625,
 -0.006404876708984375,
 -0.03582763671875,
 0.039337158203125,
 0.0090179443359375,
 -0.0545654296875,
 0.04937744140625,
 0.048004150390625,
 -0.0238494873046875,
 0.0243377685546875,
 0.028961181640625,
 0.027496337890625,
 -0.049774169921875,
 -0.02215576171875,
 -0.01526641845703125,
 0.005756378173828125,
 -0.068603515625,
 -0.0038280487060546875,
 -0.005756378173828125,
 0.08148193359375,
 -0.04742431640625,
 0.0207061767578125,
 -0.0479736328125,
 0.02374267578125,
 -0.021575927734375,
 0.064208984375,
 0.043853759765625,
 0.040924072265625,
 0.01512908935546875,
 -0.0198974609375,
 ...]

Indexing¶

Now that we have split our data and initialized the embeddings, we can start indexing it. There are a lot of different implementations of indexes, you can take a lot at available options in Vector stores. One of the popular choices is Qdrant that provides a simple data management and can be deployed both locally, on a remote machine, and on the cloud.

Qdrant support persisting your vector storage, i.e. storing it on the working machine, but for simplicity, we will use it in the in-memory mode, so that the storage exists only as long as the notebook does.

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams
from uuid import uuid4

First things first, we need to create a client – a Qdrant instance that will be the entrypoint for all the actions we do with the data.

qd_client = QdrantClient(":memory:")    # in-memory Qdrant client

Then, as we use an in-memory client that does not store the index between the notebook sessions, we need to initialize a collection. Alternatively, if we were persisting the data, we would perform a check if the collection exists and then either create or load it.

For Qdrant to initialize the structure of the index correctly, we need to provide the dimentionality of the embedding we will be using as well as teh distance metric.

collection_name = "1505"

qd_client.create_collection(
    collection_name=collection_name,
    # embedding params here
    vectors_config=VectorParams(
        size=len(test_embedding),   # is there a better way?
        distance=Distance.COSINE    # cosine distance
    )
)

True

Finally, we use a LangChain wrapper to connect to the index to unify the workflow.

vector_store = QdrantVectorStore(
    client=qd_client,
    collection_name=collection_name,
    embedding=embeddings
)

Now we are ready to add our chunks to the vector storage. As we will be adding the chunks, the index will take care about converting our passages into embeddings.

In order to be able to delete / modify the chunks afterwards, we assign them with unique ids that we generate dynamically.

ids = [str(uuid4()) for _ in range(len(docs))]
vector_store.add_documents(
    docs,
    ids=ids
)

['2032c410-99b3-4163-83e2-676a60e2c23c',
 'ab7e1e6b-8641-447c-8a6b-81412a60b62a',
 'c0522e25-3ac4-4e03-8118-87002c6d5d3b',
 '3e0e8303-d763-4d2a-bb79-1df40b004b53',
 '83b7105a-f903-47e9-8102-417c0a4086e2',
 'fa346f5e-5f96-4846-85e8-30195a48984d',
 'c9bce4cd-a789-46f4-94f4-dee67c5c8f51',
 '2e00441d-36ed-4a28-8a1d-9fe77ffd081f',
 'b3dc5a78-b7ec-4f13-8d88-dc855b446f1b',
 '044c7db0-38b3-4113-b2a3-bf802c27e448',
 'bbece7a7-b622-4db3-b2e2-56d557225379',
 '5d10cd5b-00e8-45bc-b33c-d5665dd81ecc',
 '269d4146-1ece-433f-84da-e065137e6813',
 '1e61d0bb-97c1-41f9-950e-a4844bcba337',
 'c7eefc1d-c296-4db0-b18b-9a905c1373e3',
 '714bddd1-a313-44dd-ac6a-6eb89e794cd3',
 'e3d7142d-a78f-4213-8800-640c291ee799',
 'a23e9c23-fb06-4123-9d2f-b963d935ea7f',
 '863bd9ad-65f4-42cb-8d39-424f623c744b',
 '1d632eeb-ecb2-4794-b588-539b50c6947e',
 'db5554f7-18a8-49bc-a052-0d5a718176d4',
 '06c10ade-ce8f-45e5-ae98-eadb207b77ec',
 '30d944b1-7b30-4c08-9234-e2716049047a',
 '29db095e-4c03-4cd2-a9b1-78addc0da21e',
 'd82ef95a-20de-45fc-9a7a-60a0fb0dd7ae',
 'a79ef0e7-5290-4ed1-b279-24631ed5389c',
 '0071b83c-b829-42a5-afee-61a4eb6f7c00',
 '12c0e756-a1f6-44c6-bfed-7cc88acdbe17',
 'c41466b7-0d87-4424-bca1-883df076cdf4',
 '8c7869e4-3de0-433e-a81c-bd2120caae90',
 'f4393a61-d7fe-4bcc-9847-3fcef287feb4',
 '99782261-911b-48a7-abf7-c44aec203dc0',
 'abaf799b-4964-43d7-bed4-2fbd37d1e473',
 'b1507637-6a97-418a-bc4a-60b4cb4e29db',
 '45b5c62f-2649-4f57-951c-06dece86af2c',
 'b5f85104-6624-4d1b-9517-425fdd4f72ca',
 '4210b091-ed08-4220-9e42-5194163dbc99',
 'd6351ab3-5049-4f65-9d26-0d288d7a249a',
 '60353b1a-5b53-4a49-b0f7-b7478f54824d',
 '3c2386b4-d71a-47e2-9e43-8ddfda43c24a',
 '081b5a7e-1efa-471c-b1f1-dd05da31c573',
 '3d9ef8b8-4376-4694-88bf-b51f0daa8b50',
 'c5443c07-1fa1-4038-93b6-2fe4ba26440a',
 '032216e5-fe96-4004-b973-33494972abe9',
 '10d8feea-0fd8-4be7-a57e-1aae8ab29d61',
 'a0229a81-59c5-46fc-853c-1f1f1daf2026',
 '777189d3-3946-4564-9a5c-4a9572dc98da',
 '38a0fdc8-f782-4a4b-802d-0ba13e85fb87',
 '6bc5f959-0f07-47e0-a1d4-ea5a3f1ed8db',
 '4d13c075-318e-4d94-8c31-47441ea68560',
 '0de9b240-3453-4515-8616-5b95663a69f6',
 '123bb7ae-5493-4c71-bf17-977a5aa7d06e',
 '78895ca9-be3d-42f5-911f-16d38e054dad',
 '6ad56b1f-b617-4206-9421-101cf7f47fa9',
 'ad8fd90b-cc6f-4218-9778-05718255bdaa',
 '7adc826d-0679-42a2-bc92-9e420825fbfa',
 '5a726ca6-4849-4449-945f-61d949928872',
 '2807dcc1-75d5-4573-b4f2-4cb4f1f65dc0',
 'd93cec74-6552-4036-922e-11e02b29c5fd',
 '608e98e5-db53-4af3-a75c-f71765904304',
 '90f2e696-b6aa-4aef-94e6-e571499be00a',
 'fdf2dde1-f2f3-46f7-a5e1-fd53d4658539',
 '04b62c0b-9d8a-4275-9437-9170657115eb',
 'a531b7e9-1d4e-4c0c-8eb4-3fcc1f1ef2b1',
 '626403e0-9ee4-4bf8-85c3-159951c181a1',
 '629eff22-bcc5-46f3-b787-ec424ac54999']