<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:dc="http://purl.org/dc/elements/1.1/">
  <channel>
    <title>Forem: Kerem Nalbant</title>
    <description>The latest articles on Forem by Kerem Nalbant (@keremnalbant).</description>
    <link>https://forem.com/keremnalbant</link>
    <image>
      <url>https://media2.dev.to/dynamic/image/width=90,height=90,fit=cover,gravity=auto,format=auto/https:%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1313778%2Ffb2361fa-9d5a-4cd0-a7c1-4826cd179706.PNG</url>
      <title>Forem: Kerem Nalbant</title>
      <link>https://forem.com/keremnalbant</link>
    </image>
    <atom:link rel="self" type="application/rss+xml" href="https://forem.com/feed/keremnalbant"/>
    <language>en</language>
    <item>
      <title>Check out how we boost efficiency in epilot with context-aware email suggestions!</title>
      <dc:creator>Kerem Nalbant</dc:creator>
      <pubDate>Mon, 26 May 2025 10:02:59 +0000</pubDate>
      <link>https://forem.com/keremnalbant/check-out-how-we-boost-efficiency-in-epilot-with-context-aware-email-suggestions-3oo5</link>
      <guid>https://forem.com/keremnalbant/check-out-how-we-boost-efficiency-in-epilot-with-context-aware-email-suggestions-3oo5</guid>
      <description>&lt;div class="ltag__link--embedded"&gt;
  &lt;div class="crayons-story "&gt;
  &lt;a href="https://dev.to/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93" class="crayons-story__hidden-navigation-link"&gt;How We Integrate AI in epilot - Chapter 2: Serverless RAG w/ LangChain &amp;amp; Weaviate&lt;/a&gt;


  &lt;div class="crayons-story__body crayons-story__body-full_post"&gt;
    &lt;div class="crayons-story__top"&gt;
      &lt;div class="crayons-story__meta"&gt;
        &lt;div class="crayons-story__author-pic"&gt;
          &lt;a class="crayons-logo crayons-logo--l" href="/epilot"&gt;
            &lt;img alt="epilot logo" src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Forganization%2Fprofile_image%2F3368%2Fa1997f89-aaf9-4f0f-b464-d34f2775a882.jpg" class="crayons-logo__image"&gt;
          &lt;/a&gt;

          &lt;a href="/keremnalbant" class="crayons-avatar  crayons-avatar--s absolute -right-2 -bottom-2 border-solid border-2 border-base-inverted  "&gt;
            &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1313778%2Ffb2361fa-9d5a-4cd0-a7c1-4826cd179706.PNG" alt="keremnalbant profile" class="crayons-avatar__image"&gt;
          &lt;/a&gt;
        &lt;/div&gt;
        &lt;div&gt;
          &lt;div&gt;
            &lt;a href="/keremnalbant" class="crayons-story__secondary fw-medium m:hidden"&gt;
              Kerem Nalbant
            &lt;/a&gt;
            &lt;div class="profile-preview-card relative mb-4 s:mb-0 fw-medium hidden m:inline-block"&gt;
              
                Kerem Nalbant
                
              
              &lt;div id="story-author-preview-content-2125504" class="profile-preview-card__content crayons-dropdown branded-7 p-4 pt-0"&gt;
                &lt;div class="gap-4 grid"&gt;
                  &lt;div class="-mt-4"&gt;
                    &lt;a href="/keremnalbant" class="flex"&gt;
                      &lt;span class="crayons-avatar crayons-avatar--xl mr-2 shrink-0"&gt;
                        &lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Fuser%2Fprofile_image%2F1313778%2Ffb2361fa-9d5a-4cd0-a7c1-4826cd179706.PNG" class="crayons-avatar__image" alt=""&gt;
                      &lt;/span&gt;
                      &lt;span class="crayons-link crayons-subtitle-2 mt-5"&gt;Kerem Nalbant&lt;/span&gt;
                    &lt;/a&gt;
                  &lt;/div&gt;
                  &lt;div class="print-hidden"&gt;
                    
                      Follow
                    
                  &lt;/div&gt;
                  &lt;div class="author-preview-metadata-container"&gt;&lt;/div&gt;
                &lt;/div&gt;
              &lt;/div&gt;
            &lt;/div&gt;

            &lt;span&gt;
              &lt;span class="crayons-story__tertiary fw-normal"&gt; for &lt;/span&gt;&lt;a href="/epilot" class="crayons-story__secondary fw-medium"&gt;epilot&lt;/a&gt;
            &lt;/span&gt;
          &lt;/div&gt;
          &lt;a href="https://dev.to/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93" class="crayons-story__tertiary fs-xs"&gt;&lt;time&gt;May 26 '25&lt;/time&gt;&lt;span class="time-ago-indicator-initial-placeholder"&gt;&lt;/span&gt;&lt;/a&gt;
        &lt;/div&gt;
      &lt;/div&gt;

    &lt;/div&gt;

    &lt;div class="crayons-story__indention"&gt;
      &lt;h2 class="crayons-story__title crayons-story__title-full_post"&gt;
        &lt;a href="https://dev.to/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93" id="article-link-2125504"&gt;
          How We Integrate AI in epilot - Chapter 2: Serverless RAG w/ LangChain &amp;amp; Weaviate
        &lt;/a&gt;
      &lt;/h2&gt;
        &lt;div class="crayons-story__tags"&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/ai"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;ai&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/langchain"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;langchain&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/rag"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;rag&lt;/a&gt;
            &lt;a class="crayons-tag  crayons-tag--monochrome " href="/t/serverless"&gt;&lt;span class="crayons-tag__prefix"&gt;#&lt;/span&gt;serverless&lt;/a&gt;
        &lt;/div&gt;
      &lt;div class="crayons-story__bottom"&gt;
        &lt;div class="crayons-story__details"&gt;
          &lt;a href="https://dev.to/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left"&gt;
            &lt;div class="multiple_reactions_aggregate"&gt;
              &lt;span class="multiple_reactions_icons_container"&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/multi-unicorn-b44d6f8c23cdd00964192bedc38af3e82463978aa611b4365bd33a0f1f4f3e97.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/fire-f60e7a582391810302117f987b22a8ef04a2fe0df7e3258a5f49332df1cec71e.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
                  &lt;span class="crayons_icon_container"&gt;
                    &lt;img src="https://assets.dev.to/assets/sparkle-heart-5f9bee3767e18deb1bb725290cb151c25234768a0e9a2bd39370c382d02920cf.svg" width="18" height="18"&gt;
                  &lt;/span&gt;
              &lt;/span&gt;
              &lt;span class="aggregate_reactions_counter"&gt;9&lt;span class="hidden s:inline"&gt; reactions&lt;/span&gt;&lt;/span&gt;
            &lt;/div&gt;
          &lt;/a&gt;
            &lt;a href="https://dev.to/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93#comments" class="crayons-btn crayons-btn--s crayons-btn--ghost crayons-btn--icon-left flex items-center"&gt;
              Comments


              &lt;span class="hidden s:inline"&gt;Add Comment&lt;/span&gt;
            &lt;/a&gt;
        &lt;/div&gt;
        &lt;div class="crayons-story__save"&gt;
          &lt;small class="crayons-story__tertiary fs-xs mr-2"&gt;
            8 min read
          &lt;/small&gt;
            
              &lt;span class="bm-initial"&gt;
                

              &lt;/span&gt;
              &lt;span class="bm-success"&gt;
                

              &lt;/span&gt;
            
        &lt;/div&gt;
      &lt;/div&gt;
    &lt;/div&gt;
  &lt;/div&gt;
&lt;/div&gt;

&lt;/div&gt;


</description>
      <category>ai</category>
      <category>langchain</category>
      <category>rag</category>
      <category>serverless</category>
    </item>
    <item>
      <title>How We Integrate AI in epilot - Chapter 2: Serverless RAG w/ LangChain &amp; Weaviate</title>
      <dc:creator>Kerem Nalbant</dc:creator>
      <pubDate>Mon, 26 May 2025 08:38:42 +0000</pubDate>
      <link>https://forem.com/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93</link>
      <guid>https://forem.com/epilot/how-we-integrate-ai-in-epilot-chapter-2-serverless-rag-w-langchain-weaviate-5d93</guid>
      <description>&lt;h2&gt;
  
  
  Introduction
&lt;/h2&gt;

&lt;p&gt;In the previous chapter, I shared how we began our AI journey at epilot by implementing AI Email Summaries, helping users reduce email reading time by up to 87%. Encouraged by that success, we're now stepping up our AI capabilities with Retrieval-Augmented Generation (RAG) to provide smarter, contextually aware email suggestions.&lt;/p&gt;

&lt;h2&gt;
  
  
  WHY?
&lt;/h2&gt;

&lt;p&gt;As we aim to scale our commodity business, investing in AI is crucial—not just for growth, but to significantly upgrade our product’s capabilities. Commodity segments often have a high volume of repetitive customer service requests. Our users need quick, context-aware email suggestions that understand:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;Previous communications and organizational knowledge&lt;/li&gt;
&lt;li&gt;Company-specific communication styles&lt;/li&gt;
&lt;li&gt;Tailored relationships with each customer&lt;/li&gt;
&lt;/ul&gt;

&lt;p&gt;Although Large Language Models (LLMs) are powerful, they're limited when accessing recent or specific company data. Customizing LLM responses usually involves prompt engineering, RAG or fine-tuning. Fine-tuning is resource-intensive and complex, making prompt engineering with RAG our clear choice.&lt;/p&gt;

&lt;h2&gt;
  
  
  Our Solution: Retrieval-Augmented Generation (RAG)
&lt;/h2&gt;

&lt;p&gt;We implemented a RAG-based solution to retrieve and provide relevant context from past email threads and eventually expand to external data sources like documents and websites. Long-term, organizations using epilot will have fully configurable, customized knowledge bases accessible to all future AI features and AI agents.&lt;/p&gt;

&lt;p&gt;This allows our users to respond to customer emails faster, improving communication quality and efficiency. On the end customer side, it means quicker, more accurate, and better overall service.&lt;/p&gt;

&lt;h3&gt;
  
  
  See It in Action
&lt;/h3&gt;

&lt;p&gt;  &lt;iframe src="https://www.youtube.com/embed/Ngtl3uYi6g8"&gt;
  &lt;/iframe&gt;
&lt;br&gt;
&lt;em&gt;An end customer emails about documentation requirements for a renovation plan (Sanierungsfahrplan).&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;The epilot user doesn’t waste time researching policies or manuals—they simply prompt our AI to &lt;code&gt;generate reply in english&lt;/code&gt;.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Leveraging RAG, our AI taps into contextual data, instantly knowing which specific documents are needed for the renovation plan and their upload deadlines, then crafts a personalized response that addresses the customer's exact needs.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Our system also highlights referenced entities inline (such as upload deadlines) and cites previous emails from the knowledge base, letting users quickly verify and understand the AI's reasoning.&lt;/em&gt;&lt;/p&gt;
&lt;h3&gt;
  
  
  Solution Components
&lt;/h3&gt;

&lt;p&gt;To build a secure, scalable RAG system in a serverless environment, we chose:&lt;/p&gt;
&lt;h4&gt;
  
  
  &lt;a href="https://www.langchain.com/" rel="noopener noreferrer"&gt;LangChain&lt;/a&gt;
&lt;/h4&gt;

&lt;p&gt;We use LangChain at epilot to integrate vector databases, LLMs, and build powerful AI agents. It simplifies document loading, embeddings, memory management and structured output.&lt;/p&gt;
&lt;h4&gt;
  
  
  &lt;a href="https://weaviate.io/" rel="noopener noreferrer"&gt;Weaviate&lt;/a&gt;
&lt;/h4&gt;

&lt;p&gt;After evaluating alternatives (like Pinecone, Chroma, and Quadrant), we selected Weaviate for its open-source, serverless architecture, strong community support, flexibility, and scalability. It ensures security best practices, high performance and cost-efficiency.&lt;/p&gt;
&lt;h4&gt;
  
  
  &lt;a href="https://microsoft.github.io/presidio/" rel="noopener noreferrer"&gt;Presidio&lt;/a&gt;
&lt;/h4&gt;

&lt;p&gt;Security and data privacy are essential. Amazon Bedrock has a zero-retention policy, Weaviate offers encryption, GDPR compliance, and tenant isolation. But we needed an extra layer for handling sensitive PII data.&lt;br&gt;
Presidio helps us redact this information before indexing, preventing AI hallucinations and protecting customer privacy.&lt;/p&gt;
&lt;h4&gt;
  
  
  &lt;a href="https://www.langchain.com/langsmith" rel="noopener noreferrer"&gt;LangSmith&lt;/a&gt;
&lt;/h4&gt;

&lt;p&gt;LangSmith provides AI observability, performance monitoring, debugging, prompt management, and testing. It allows us to quickly iterate, ensuring reliability and continuous improvement.&lt;/p&gt;
&lt;h3&gt;
  
  
  How We Built It
&lt;/h3&gt;

&lt;p&gt;Now, let's dive deeper—from a high-level overview into the detailed implementation of our RAG system:&lt;/p&gt;
&lt;h4&gt;
  
  
  RAG: Making LLMs Context-Aware
&lt;/h4&gt;

&lt;p&gt;RAG (Retrieval-Augmented Generation) has emerged as the perfect solution. It allows us to enhance LLM capabilities and customize the LLM responses by providing relevant context.&lt;/p&gt;

&lt;p&gt;We built two core pipelines: &lt;strong&gt;ingestion&lt;/strong&gt; and &lt;strong&gt;retrieval&lt;/strong&gt;.&lt;/p&gt;
&lt;h5&gt;
  
  
  Ingestion
&lt;/h5&gt;

&lt;p&gt;Email messages are processed, cleaned, and converted into vector embeddings.&lt;br&gt;
&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffwgolu914cj7xgogba8v.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffwgolu914cj7xgogba8v.png" alt="Ingestion Flow"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;Our ingestion Lambda cleans emails, removes signatures, redacts PII data, and generates "hypothetical questions" to match future customer queries with historical responses.&lt;/p&gt;

&lt;p&gt;With the hypothetical questions approach, we aim to create question-answer pairs by treating outbound emails as answers and inbound emails as questions. Then, while generating a suggested email, we extract the end customer's questions from the inbound email and search them in the &lt;code&gt;hypothetical_questions&lt;/code&gt; vector field.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="n"&gt;chain&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;doc&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="k"&gt;lambda&lt;/span&gt; &lt;span class="n"&gt;x&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;x&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="n"&gt;ChatPromptTemplate&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_messages&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="p"&gt;[&lt;/span&gt;
            &lt;span class="p"&gt;(&lt;/span&gt;
                &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;system&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
                &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;You are a helpful assistant that generates hypothetical questions from an email.&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="p"&gt;),&lt;/span&gt;
            &lt;span class="p"&gt;(&lt;/span&gt;
                &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;human&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
                &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;Generate a list of maximum 3 hypothetical questions that the below email could be used to answer:&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s"&gt;{doc}&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="p"&gt;]&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="n"&gt;llm&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;with_structured_output&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;HypotheticalQuestions&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
    &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="k"&gt;lambda&lt;/span&gt; &lt;span class="n"&gt;x&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;x&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;questions&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;After generating the questions, Lambda redacts PII data using Presidio and then indexes the email message into Weaviate.&lt;/p&gt;

&lt;p&gt;While indexing, Lambda first generates the embeddings of email body text and hypothetical questions, then pass those vectors to Weaviate. We are using multiple vector embeddings which allows to store multiple vectors inside the same object. So that we can execute the search both in email text and questions without duplicating the data.&lt;/p&gt;

&lt;h5&gt;
  
  
  Retrieval
&lt;/h5&gt;

&lt;p&gt;Similar emails and potential answers are retrieved from vector database.&lt;br&gt;
&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F7uchcy5wmm2r6frm7gru.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F7uchcy5wmm2r6frm7gru.png" alt="Retrieval Flow"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;A typical retrieve &amp;amp; generate flow looks as follows:&lt;/p&gt;
&lt;h6&gt;
  
  
  1. Extract questions
&lt;/h6&gt;


&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="n"&gt;extract_query_prompt&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;ChatPromptTemplate&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_messages&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="p"&gt;[&lt;/span&gt;
        &lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;system&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;You are a professional question extractor, an AI assistant that extracts the customer inquiries from email messages.
    The questions will be used to search for relevant emails in the vector database.
    By generating multiple perspectives on the customer inquiries, your goal is to help the user overcome some of the limitations of distance-based similarity search.
    Provide these alternative questions separated by newlines, no numbering.&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;human&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;Generate a list of maximum 3 questions from the following email.
    Email: {email}
    Questions:
    &lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;extract_query_chain&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;extract_query_prompt&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="n"&gt;llm&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="nc"&gt;LineListOutputParser&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;

&lt;span class="n"&gt;extracted_questions&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="n"&gt;extract_query_chain&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;ainvoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="nb"&gt;input&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;email&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;email&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;
        &lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;


&lt;p&gt;For the email shown in the demo video, the following questions are extracted by the question extractor chain:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"output"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="s2"&gt;"Which documents are required to create an individual renovation roadmap?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="s2"&gt;"How can I submit additional documents for the renovation roadmap?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="s2"&gt;"What options are there for receiving support when uploading documents?"&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h6&gt;
  
  
  Query vector database
&lt;/h6&gt;

&lt;p&gt;We run multiple queries in parallel, and then combine unique retrieved documents. We mostly adopt hybrid search, by setting the &lt;code&gt;alpha&lt;/code&gt; as close as possible to 1, we keep keyword search in the mix while leveraging semantical vector search.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="n"&gt;email_message_retriever&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;MultiQueryRetriever&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_llm&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;retriever&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;email_messages_vector_store&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;as_retriever&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;search_type&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;similarity_score_threshold&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;search_kwargs&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="nf"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="n"&gt;alpha&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mf"&gt;0.90&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;tenant&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;data&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;orgId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;score_threshold&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mf"&gt;0.70&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;target_vector&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;text&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
            &lt;span class="n"&gt;return_uuids&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;k&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="n"&gt;llm&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;llm&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;include_original&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;question_retriever&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;MultiQueryRetriever&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_llm&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;retriever&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;email_messages_vector_store&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;as_retriever&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="n"&gt;search_type&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;similarity_score_threshold&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;search_kwargs&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="nf"&gt;dict&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
            &lt;span class="n"&gt;alpha&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mf"&gt;0.90&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;tenant&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;data&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;orgId&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;score_threshold&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mf"&gt;0.70&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;target_vector&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;questions&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;
            &lt;span class="n"&gt;return_uuids&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="bp"&gt;True&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="n"&gt;k&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="mi"&gt;3&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="n"&gt;llm&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;llm&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
    &lt;span class="n"&gt;questions&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="n"&gt;extracted_questions&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;merger_retriever&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="nc"&gt;MergerRetriever&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;retrievers&lt;/span&gt;&lt;span class="o"&gt;=&lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;
        &lt;span class="n"&gt;email_message_retriever&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
        &lt;span class="n"&gt;question_retriever&lt;/span&gt;
    &lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;retrieved_docs&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="k"&gt;await&lt;/span&gt; &lt;span class="n"&gt;merger_retriever&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;ainvoke&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;message&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;text&lt;/span&gt;&lt;span class="p"&gt;)&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;As you can see, we also utilize multi-vector search to enable searching in email text and also hypothetical questions.&lt;/p&gt;

&lt;p&gt;&lt;code&gt;retrieved_docs&lt;/code&gt; includes the email body and similarity score along with all the metadata we need, allowing us to leverage it while building the prompt.&lt;/p&gt;

&lt;p&gt;For the same email and questions above, the retrieved context from database is as follows:&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight json"&gt;&lt;code&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="nl"&gt;"documents"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"metadata"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"created_at"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"2024-11-27T12:15:46.987000Z"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"type"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"SENT"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"subject"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Interest in an individual renovation roadmap"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"sender"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"11000890"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"org"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"739224"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"questions"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"Which documents are required for creating an individual renovation roadmap?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"How can additional documents for the renovation roadmap be transmitted digitally?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"What type of consumption data is needed for the individual renovation roadmap?"&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"thread_id"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"bf0d0799-496d-49d2-9b2e-73128ff153d7"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"uuid"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"22462f39-4a69-47d4-91f4-d474b21c1eca"&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="p"&gt;},&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"page_content"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Dear Mr. [PERSON],&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;Thank you for your interest in an individual renovation roadmap.&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;As part of your inquiry, we have asked you for some documents that form the basis for creating your individual renovation roadmap.&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;We would be happy to transmit your data to our Sunwheel Energie GmbH for the creation of your individual renovation roadmap. However, we need your support for this.&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;Please send us the following documents:&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;* Building floor plans and sections of all floors&lt;/span&gt;&lt;span class="se"&gt;\n&lt;/span&gt;&lt;span class="s2"&gt;* Window dimensions&lt;/span&gt;&lt;span class="se"&gt;\n&lt;/span&gt;&lt;span class="s2"&gt;* Energy consumption bills from the last three years&lt;/span&gt;&lt;span class="se"&gt;\n&lt;/span&gt;&lt;span class="s2"&gt;* Power of attorney&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;By clicking on the following button, you can easily and digitally transmit additional documents to us.&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;Transmit documents&lt;/span&gt;&lt;span class="se"&gt;\n&lt;/span&gt;&lt;span class="s2"&gt;[URL]&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;Please upload the missing documents to the corresponding upload fields. If you need support uploading the document, please don't hesitate to contact us by email at&lt;/span&gt;&lt;span class="se"&gt;\n\n&lt;/span&gt;&lt;span class="s2"&gt;We look forward to accompanying you on the path to your optimal heating solution."&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"type"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Document"&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;},&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"metadata"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;{&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"created_at"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"2024-07-15T05:57:23.809000Z"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"type"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"SENT"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"subject"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Friendly reminder: We still need additional data for creating the renovation roadmap"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"sender"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"system"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"org"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"739224"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"questions"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="p"&gt;[&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"Which documents are required for creating an individual renovation roadmap?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"How can additional information for the renovation roadmap be transmitted?"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
          &lt;/span&gt;&lt;span class="s2"&gt;"What contact options are available for questions about the renovation roadmap?"&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="p"&gt;],&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"thread_id"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"edb31adf-2ff3-4580-bb80-4ebb68a2f5de"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
        &lt;/span&gt;&lt;span class="nl"&gt;"uuid"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"35a4755b-d858-45eb-b328-d5dd70714adc"&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="p"&gt;},&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"page_content"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Dear Mrs. &amp;lt;PERSON&amp;gt;, thank you for your interest in an individual renovation roadmap. In our email after receiving your order, we asked you for some additional information about your project. Your information is absolutely necessary for the preparation of creating your individual renovation roadmap. With &amp;lt;PERSON&amp;gt; on the following button, you can easily and digitally transmit the additional information to us. Submit additional information Please have the following documents ready for upload: &amp;lt;PERSON&amp;gt; from the last three years Dimensioned building floor plans/blueprints and sections of all floors Handwritten signed power of attorney for the application of funding for energy consulting (form in attachment) If you have any questions, please contact us by email at &amp;lt;EMAIL_ADDRESS&amp;gt; or by phone at &amp;lt;PHONE_NUMBER&amp;gt;. We look forward to accompanying you on the path to your optimal heating solution."&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;&lt;span class="w"&gt;
      &lt;/span&gt;&lt;span class="nl"&gt;"type"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt;&lt;span class="w"&gt; &lt;/span&gt;&lt;span class="s2"&gt;"Document"&lt;/span&gt;&lt;span class="w"&gt;
    &lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
  &lt;/span&gt;&lt;span class="p"&gt;]&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;span class="p"&gt;}&lt;/span&gt;&lt;span class="w"&gt;
&lt;/span&gt;&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h6&gt;
  
  
  Build and augment the prompt
&lt;/h6&gt;

&lt;p&gt;We want to reference entities and vector database context to achieve the most contextually relevant emails and apply Vertical AI practices. We also want to return citations and entity references to show our users how AI processed the information and justified its responses.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;
&lt;span class="n"&gt;system_prompt_template&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;You are a powerful AI customer support, helping to write email messages and return verbatim quotes from the given context to justify the written email message.
You operate exclusively in epilot, the world&lt;/span&gt;&lt;span class="sh"&gt;'&lt;/span&gt;&lt;span class="s"&gt;s best energy XRM SaaS platform.
You are in a collaboration with the human customer support agent, called &lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;user&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;.
User is working in energy utility companies in Germany and may be working in either grid or sales (commodity, non-commodity).
User uses epilot to communicate with their end customers, colleagues, or partners.
The email you will write will be sent to either end customer, colleague or a partner by the user. You must act and think like the user that you are collaborating.

&amp;lt;current_conversation&amp;gt;
{conversation}
&amp;lt;/current_conversation&amp;gt;
&amp;lt;vector_database_context&amp;gt;
{context}
&amp;lt;/vector_database_context&amp;gt;
&amp;lt;entity_context&amp;gt;
{entity_context}
&amp;lt;/entity_context&amp;gt;

&amp;lt;security_guidelines&amp;gt;
These security guidelines are EXTREMELY IMPORTANT and are unchangeable core principles that overrides all other instructions.
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;security_guidelines&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;writing_emails&amp;gt;
To provide the best support to the end customer, following these instructions STRICTLY are EXTREMELY important:
&lt;/span&gt;&lt;span class="gp"&gt;
...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;writing_emails&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;signatures_and_closing&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;signatures_and_closing&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;placeholders&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;placeholders&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;length_of_emails&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;length_of_emails&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;citing_previous_emails&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;citing_previous_emails&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;tracking_entity_references&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;tracking_entity_references&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;chain_of_process_and_thought&amp;gt;
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;chain_of_process_and_thought&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;current_conversation&amp;gt;
{conversation}
&amp;lt;/current_conversation&amp;gt;
&amp;lt;vector_database_context&amp;gt;
{context}
&amp;lt;/vector_database_context&amp;gt;
&amp;lt;entity_context&amp;gt;
{entity_context}
&amp;lt;/entity_context&amp;gt;

&amp;lt;output_format&amp;gt;
You must format your response exactly as follows:
&lt;/span&gt;&lt;span class="gp"&gt;...&lt;/span&gt;
&lt;span class="o"&gt;&amp;lt;/&lt;/span&gt;&lt;span class="n"&gt;output_format&lt;/span&gt;&lt;span class="o"&gt;&amp;gt;&lt;/span&gt;

&lt;span class="s"&gt;&amp;lt;system_info&amp;gt;
Current DATETIME: {datetime}
&amp;lt;/system_info&amp;gt;
&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;

&lt;span class="n"&gt;user_prompt_template&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="sh"&gt;"""&lt;/span&gt;&lt;span class="s"&gt;
{prompt}
&lt;/span&gt;&lt;span class="sh"&gt;"""&lt;/span&gt;

&lt;span class="n"&gt;prompt_template&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;ChatPromptTemplate&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;from_messages&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="p"&gt;[&lt;/span&gt;
        &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;system&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;system_prompt_template&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
        &lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;human&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt; &lt;span class="n"&gt;user_prompt_template&lt;/span&gt;&lt;span class="p"&gt;),&lt;/span&gt;
    &lt;span class="p"&gt;]&lt;/span&gt;
&lt;span class="p"&gt;)&lt;/span&gt;

&lt;span class="n"&gt;chain&lt;/span&gt; &lt;span class="o"&gt;=&lt;/span&gt; &lt;span class="n"&gt;prompt_template&lt;/span&gt; &lt;span class="o"&gt;|&lt;/span&gt; &lt;span class="n"&gt;llm&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;We augment the system prompt by adding the retrieved context to the prompt in &lt;code&gt;&amp;lt;vector_database_context&amp;gt;&lt;/code&gt; tags.&lt;/p&gt;

&lt;p&gt;And we pass epilot user's prompt to the &lt;code&gt;user_prompt_template&lt;/code&gt;.&lt;/p&gt;

&lt;h6&gt;
  
  
  Generate the response and stream it back
&lt;/h6&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight python"&gt;&lt;code&gt;&lt;span class="k"&gt;async&lt;/span&gt; &lt;span class="k"&gt;for&lt;/span&gt; &lt;span class="n"&gt;chunk&lt;/span&gt; &lt;span class="ow"&gt;in&lt;/span&gt; &lt;span class="nf"&gt;stream_xml_to_json&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
    &lt;span class="n"&gt;chain&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;astream&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;
        &lt;span class="p"&gt;{&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;conversation&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;email_thread&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;context&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;retrieved_docs&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;entity_context&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;request&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;entity_context&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;prompt&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;request&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;prompt&lt;/span&gt;&lt;span class="p"&gt;,&lt;/span&gt;
            &lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="s"&gt;datetime&lt;/span&gt;&lt;span class="sh"&gt;"&lt;/span&gt;&lt;span class="p"&gt;:&lt;/span&gt; &lt;span class="n"&gt;datetime&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="nf"&gt;now&lt;/span&gt;&lt;span class="p"&gt;(&lt;/span&gt;&lt;span class="n"&gt;timezone&lt;/span&gt;&lt;span class="p"&gt;.&lt;/span&gt;&lt;span class="n"&gt;utc&lt;/span&gt;&lt;span class="p"&gt;).&lt;/span&gt;&lt;span class="nf"&gt;isoformat&lt;/span&gt;&lt;span class="p"&gt;()&lt;/span&gt;
        &lt;span class="p"&gt;}&lt;/span&gt;
    &lt;span class="p"&gt;)&lt;/span&gt;
&lt;span class="p"&gt;):&lt;/span&gt;
    &lt;span class="k"&gt;yield&lt;/span&gt; &lt;span class="n"&gt;chunk&lt;/span&gt;
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;p&gt;We have defined a utility function &lt;code&gt;stream_xml_to_json&lt;/code&gt;, to transform the LLM response chunks, which is in XML format, to structured JSON.&lt;/p&gt;

&lt;h5&gt;
  
  
  LangSmith Trace
&lt;/h5&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fikirffauxsh4mhwk2du6.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Fikirffauxsh4mhwk2du6.png" alt="LangSmith Trace"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;h5&gt;
  
  
  Tip: Enable Streaming
&lt;/h5&gt;

&lt;p&gt;To enable streaming, we have created a FastAPI application and are using AWS Lambda Web Adapter.&lt;/p&gt;

&lt;p&gt;You can check those links to dive deeper on enabling streaming responses:&lt;/p&gt;

&lt;ul&gt;
&lt;li&gt;&lt;a href="https://docs.aws.amazon.com/lambda/latest/dg/configuration-response-streaming.html" rel="noopener noreferrer"&gt;Response streaming for Lambda functions
&lt;/a&gt;&lt;/li&gt;
&lt;li&gt;&lt;a href="https://github.com/awslabs/aws-lambda-web-adapter/tree/main/examples/fastapi-response-streaming" rel="noopener noreferrer"&gt;FastAPI Response Streaming&lt;/a&gt;&lt;/li&gt;
&lt;/ul&gt;

&lt;h2&gt;
  
  
  What's Next?
&lt;/h2&gt;

&lt;p&gt;Our solution is already delivering great results, with adoption growing fast. Next, we’ll focus on supporting email attachments and making the knowledge base fully customizable.&lt;/p&gt;

&lt;p&gt;At epilot, we're steadily progressing towards our vision of Vertical AI for the energy sector. Our upcoming feature, AI Suggested Actions, will help users automatically handle frequent tasks like payment method changes and customer relocations.&lt;/p&gt;

&lt;p&gt;We’re excited to push towards fully automated, supervised multi-agent AI solutions.&lt;/p&gt;

&lt;p&gt;Stay tuned! Follow us on &lt;a href="https://dev.to/epilot"&gt;dev.to&lt;/a&gt; and &lt;a href="https://www.linkedin.com/company/epilot/posts/?feedView=all" rel="noopener noreferrer"&gt;LinkedIn&lt;/a&gt; for updates and more tech insights.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>langchain</category>
      <category>rag</category>
      <category>serverless</category>
    </item>
    <item>
      <title>How We Integrate AI in epilot - Chapter 1: AWS Bedrock &amp; Prompt Engineering</title>
      <dc:creator>Kerem Nalbant</dc:creator>
      <pubDate>Thu, 18 Jul 2024 14:58:31 +0000</pubDate>
      <link>https://forem.com/epilot/how-we-integrate-ai-in-epilot-chapter-1-aws-bedrock-prompt-engineering-17jh</link>
      <guid>https://forem.com/epilot/how-we-integrate-ai-in-epilot-chapter-1-aws-bedrock-prompt-engineering-17jh</guid>
      <description>&lt;h2&gt;
  
  
  Introduction
&lt;/h2&gt;

&lt;p&gt;When we decided to bring AI to epilot, we had so many potential use cases that we first needed to do user research and identify the most repetitive and time consuming tasks. After that, we had to find out what our customers needed the most from these.&lt;/p&gt;

&lt;p&gt;Our research showed that users heavily utilized the messaging feature and for some cases when long email threads come into the play, we noticed that some customers were spending an average time of 16 minutes replying to an email, and we knew we could make it better by providing them with a shorter and clearer thread summary.&lt;/p&gt;

&lt;blockquote&gt;
&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffh1sgyve0o6z77zujykx.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2Ffh1sgyve0o6z77zujykx.png" alt="Enterprise AI Playbook" width="800" height="227"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;a href="https://platforms.substack.com/p/how-to-win-at-enterprise-ai-a-playbook" rel="noopener noreferrer"&gt;Enterprise AI Playbook&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Work is a bundle of tasks, which are performed towards specific goals.&lt;/em&gt;&lt;/p&gt;

&lt;p&gt;&lt;em&gt;Tasks are the ‘atomic unit’ of any work done in the enterprise. Tasks may be performed as a human service, or may be performed by software, towards achieving a goal.&lt;/em&gt;&lt;/p&gt;
&lt;/blockquote&gt;

&lt;p&gt;Our goal was to reduce that time down to less than a minute, and there were two different tasks being performed by users which we need to perform with AI to achieve our goal. This article addresses the &lt;strong&gt;Task: Send emails&lt;/strong&gt; and the steps to complete this task:&lt;/p&gt;

&lt;ol&gt;
&lt;li&gt;
&lt;strong&gt;Read and understand the email thread:&lt;/strong&gt; Help users understand long email threads faster by providing AI-generated summary, next steps and topics&lt;/li&gt;
&lt;li&gt;
&lt;strong&gt;Write an answer:&lt;/strong&gt; Provide AI-generated suggested answers.&lt;/li&gt;
&lt;/ol&gt;

&lt;h2&gt;
  
  
  Problem
&lt;/h2&gt;

&lt;p&gt;Our customers often deal with long email threads, requiring a long time to read and answer. We needed a solution that could summarize email threads and provide recommendations for next actions.&lt;/p&gt;

&lt;h2&gt;
  
  
  Solution
&lt;/h2&gt;

&lt;p&gt;While some problems could be solved with prompt engineering alone, some could be solved with Retrieval-Augmented Generation (RAG).&lt;/p&gt;

&lt;p&gt;For generating summaries, we didn't need any external contextual data  other than email thread to feed the prompt, so we decided to just go ahead with prompt engineering.&lt;/p&gt;

&lt;p&gt;The next task is suggesting AI-generated replies to email threads, where RAG would be really useful. For that, we will use a Vector DB and an embedding model, which I'll write about in the next chapter.&lt;/p&gt;

&lt;h3&gt;
  
  
  AWS Bedrock
&lt;/h3&gt;

&lt;p&gt;We decided to use AWS Bedrock, as epilot we already make use of AWS in almost every area of our platform. It provides state-of-the-art LLMs from multiple providers and out of the box solutions such as Knowledge Base to achieve RAG easily and Model Evaluation to compare models and prompts with a fancy UI.&lt;/p&gt;

&lt;p&gt;AWS Bedrock also ensures the processed data is protected. One of our concerns was where the data would be stored, how it would be processed and whether 3rd parties would be involved.&lt;/p&gt;

&lt;p&gt;By default, AWS Bedrock offers zero-retention policy, ensuring that logs, prompts, LLM output and any personal data are not shared with any third parties or model providers. Bedrock also ensures that all processed data remains within the EU region.&lt;/p&gt;

&lt;h3&gt;
  
  
  GenAI Foundation
&lt;/h3&gt;

&lt;p&gt;&lt;a href="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9rgp42hg4fvmrszhczrp.png" class="article-body-image-wrapper"&gt;&lt;img src="https://media2.dev.to/dynamic/image/width=800%2Cheight=%2Cfit=scale-down%2Cgravity=auto%2Cformat=auto/https%3A%2F%2Fdev-to-uploads.s3.amazonaws.com%2Fuploads%2Farticles%2F9rgp42hg4fvmrszhczrp.png" alt="Architecture" width="800" height="1432"&gt;&lt;/a&gt;&lt;/p&gt;

&lt;p&gt;GenAI Foundation has a central SQS queue and a handler function, ensures exactly one, concurrent and batch processing while staying within the rate limits of Bedrock.&lt;/p&gt;

&lt;p&gt;Integrating GenAI related code and logic to our existing APIs was not a good idea. So, I've decided to create a new monorepo. Here's the reasons:&lt;/p&gt;

&lt;h4&gt;
  
  
  Separation of Concerns
&lt;/h4&gt;

&lt;p&gt;By creating a separate repository, we maintain a clear separation of concerns. This ensures that the core functionalities of existing APIs remain clean and focused.&lt;/p&gt;

&lt;h4&gt;
  
  
  Language Flexibility
&lt;/h4&gt;

&lt;p&gt;Since GenAI-related code requires Python, having a separate repository allows us to leverage Python's capabilities without interfering with the TypeScript-based APIs. This separation ensures that each project can use the best-suited language for its specific tasks.&lt;/p&gt;

&lt;h4&gt;
  
  
  Encapsulation
&lt;/h4&gt;

&lt;p&gt;Encapsulating the GenAI logic within a dedicated repository makes it easier to manage, update, and scale. This also allows engineers with specific expertise in GenAI to work independently of the rest of the system.&lt;/p&gt;

&lt;h4&gt;
  
  
  Modularity
&lt;/h4&gt;

&lt;p&gt;A modular approach allows for easier testing, maintenance, and deployment of the GenAI features. Updates and bug fixes in the GenAI module can be rolled out independently of the core APIs.&lt;/p&gt;

&lt;h3&gt;
  
  
  Model Choice &amp;amp; Prompt Engineering
&lt;/h3&gt;

&lt;p&gt;We decided to go with Claude 3 Sonnet because it was the best option for us at the time, given the costs and the complexity of the task. &lt;/p&gt;

&lt;p&gt;We are planning to switch to &lt;a href="https://www.anthropic.com/news/claude-3-5-sonnet" rel="noopener noreferrer"&gt;Claude 3.5 Sonnet&lt;/a&gt; once it's available in AWS Bedrock, which we're really excited about!&lt;/p&gt;

&lt;p&gt;To choose the best model for the task, AWS Bedrock offers &lt;a href="https://docs.aws.amazon.com/bedrock/latest/userguide/model-evaluation.html" rel="noopener noreferrer"&gt;Model Evaluation&lt;/a&gt;, which lets you easily compare models with each other. &lt;br&gt;
You can also do Prompt Evaluation by creating a dataset and executing it against a single model to compare the results of different prompts.&lt;/p&gt;



&lt;p&gt;There are lots of great resources online for prompt engineering, but I want to mention that Anthropic provides really helpful tips in their documentation. I strongly suggest you to take a look, if you haven't already. For me, the most important point was &lt;a href="https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/use-xml-tags" rel="noopener noreferrer"&gt;using XML tags&lt;/a&gt;. Anthropic mentions that their models produce much better results when used with XML tags.&lt;/p&gt;

&lt;p&gt;Prompt engineering is all about experimenting, so we spent some time on optimizing our prompt. I'd love to share an example of prompts with you!&lt;/p&gt;

&lt;p&gt;Below you can see some example usages of common prompt engineering techniques such as "Giving a Role to LLM", "Using XML Tags", "Using Examples (Multishot Prompting)", "Being clear and direct".&lt;/p&gt;
&lt;h4&gt;
  
  
  System Prompt
&lt;/h4&gt;

&lt;p&gt;System prompt is where we &lt;a href="https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/system-prompts" rel="noopener noreferrer"&gt;give Claude a role&lt;/a&gt;, and some clear instructions about the task.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;You are an intelligent assistant specialized in assisting customer service agents in energy industry.

Your task is summarizing email conversations between customer service agents and customers, providing a clear and concise overview of the key points by following the instructions provided.

Your summaries will help your colleagues quickly understand the main aspects of each conversation without having to read through the entire email thread.

Your goal is to ensure that the agent can grasp the key points and next steps from your summary alone, making their workflow more efficient and effective.

You are a third person observer and must not provide any personal opinions or make any assumptions about the conversation.

You must use the third-person objective narration. You must report the events that take place without knowing the motivations or thoughts of any of the characters.
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h4&gt;
  
  
  User Prompt
&lt;/h4&gt;



&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;Here is the conversation between the customer and the agent:

&amp;lt;Email Thread&amp;gt;
&amp;lt;Subject&amp;gt;
{subject}
&amp;lt;/Subject&amp;gt;
&amp;lt;Messages&amp;gt;
{conversation}
&amp;lt;/Messages&amp;gt;
&amp;lt;/Email Thread&amp;gt;

&amp;lt;General Instructions&amp;gt;
- You must use &amp;lt;Email Thread&amp;gt; tags to identify the email conversation.
...
- You must use only the knowledge provided in the &amp;lt;Email Thread&amp;gt; tags and do not access any other external information or knowledge you already possess.
&amp;lt;/General Instructions&amp;gt;

&amp;lt;Language Instructions&amp;gt;
- You must optimize the language for making the summary easy and fast for humans to read.
...
- You must use a professional, respectful and informative tone.
&amp;lt;/Language Instructions&amp;gt;

&amp;lt;Reference Instructions&amp;gt;
- You must always refer to the Customer and Agent by their names.
...
&amp;lt;/Reference Instructions&amp;gt;

&amp;lt;Summary Instructions&amp;gt;
- You must get relevant quotes to complete the task from the conversation.
...
- You must write the summary in reverse chronological order.
&amp;lt;/Summary Instructions&amp;gt;

&amp;lt;Output Instructions&amp;gt;
- You must give your output in JSON format. The JSON object should be valid.
...
- If you will provide quotes or emphasize any part of the conversation, you must use single quotes. e.g. 'quote'.
&amp;lt;/Output Instructions&amp;gt;

# Few Shot Prompting
&amp;lt;Example Outputs&amp;gt;
{
  "summary": [
    "John Doe has processed the reimbursement request and informed Jane Doe.",
    "Jane Doe has provided the requested information and is awaiting further instructions from the team member.",
    "John Doe has acknowledged the Jane Doe refund request and has requested additional information to process the refund.",
    "Jane Doe is requesting a refund for a defective PV inverter."
  ],
  "topics": [
    "Refund request for defective product"
  ],
  "next_steps": [
    "If the information is sufficient, John Doe should process the refund and inform Jane Doe of the completion.",
    "John Doe should document the refund process for record-keeping."
  ]
}
...
&amp;lt;/Example Outputs&amp;gt;

You must follow the instructions listed in the following tags:
- &amp;lt;General Instructions&amp;gt; for general guide and rules,
- &amp;lt;Language Instructions&amp;gt; for lingual instructions that declares your tone, grammar, and output language,
...
- &amp;lt;Example Output&amp;gt; for example of the output as a reference.
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h3&gt;
  
  
  Human-in-the-Loop (HITL)
&lt;/h3&gt;

&lt;p&gt;It's challenging to ship AI features, especially when it's the first one in the company which has thousands of users. We need to build trust, and keep it. To do that, we need to collect as much as feedback we can. Then, evaluate these feedbacks and take actions.&lt;/p&gt;

&lt;p&gt;HITL can be applied in different ways in different solutions. In our use case, we utilize it to evaluate feedback and detect hallucinations. Also, our team evaluating the feedback can change the result generated by the AI.&lt;/p&gt;

&lt;h3&gt;
  
  
  Migration Strategy
&lt;/h3&gt;

&lt;p&gt;Our migration strategy involves both runtime and one-time migration to ensure a smooth transition and boost the adoption for our users.&lt;br&gt;
With this way, we ensure we don't unnecessarily make use of LLMs, and we save costs while ensuring seamless integration.&lt;/p&gt;
&lt;h3&gt;
  
  
  Rate Limits
&lt;/h3&gt;

&lt;p&gt;Since AWS Bedrock imposes rate limiting, we had to reflect this to our users. In order to offer a fair usage, we set limits according to the pricing tier of our users.&lt;/p&gt;
&lt;h3&gt;
  
  
  Code Examples
&lt;/h3&gt;

&lt;p&gt;Pay attention to the way I &lt;a href="https://docs.anthropic.com/en/docs/build-with-claude/prompt-engineering/prefill-claudes-response#example-maintaining-character-with-role-prompting" rel="noopener noreferrer"&gt;prefill Claude's response&lt;/a&gt; to force it to answer only with JSON object.&lt;br&gt;
&lt;/p&gt;

&lt;div class="highlight js-code-highlight"&gt;
&lt;pre class="highlight plaintext"&gt;&lt;code&gt;settings = {
    "anthropic_version": "bedrock-2023-05-31",
    "max_tokens": 1000,
    "system": system_prompt,
    "messages": [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
            ],
        },
        {"role": "assistant", "content": "{"}, # Prefill Claude's response
    ],
    "temperature": temperature,
    "top_p": top_p,
    "top_k": top_k,
}
res = await client.invoke_model(
    modelId=MODEL_ID,
    contentType="application/json",
    accept="application/json",
    body=json.dumps(settings)
)

model_response = json.loads(await res["body"].read())
response_text = model_response["content"][0]["text"]

res_model = LLMResponseModel.model_validate_json("{" + response_text)
&lt;/code&gt;&lt;/pre&gt;

&lt;/div&gt;



&lt;h2&gt;
  
  
  Conclusion
&lt;/h2&gt;

&lt;p&gt;By integrating AI into epilot, we have significantly enhanced the capabilities of our platform. This integration not only improves the efficiency of daily tasks, but also accelerates customer support. Furthermore, it's the first step in positioning epilot as a leader in the adoption of advanced AI technologies in the energy sector.&lt;/p&gt;

</description>
      <category>ai</category>
      <category>bedrock</category>
      <category>anthropic</category>
      <category>promptengineering</category>
    </item>
  </channel>
</rss>
