class CustomCallbackHandler()

in apps/chat/src/constants/code-apps.ts [116:203]


class CustomCallbackHandler(AsyncCallbackHandler):
    def __init__(self, choice: Choice):
        self._choice = choice

    async def on_llm_new_token(self, token: str, *args, **kwargs) -> None:
        self._choice.append_content(token)


class SimpleRAGApplication(ChatCompletion):
    async def chat_completion(
        self, request: Request, response: Response
    ) -> None:
        collection_name = str(uuid4())

        with response.create_single_choice() as choice:
            message = request.messages[-1]
            user_query = message.text()

            file_url = get_last_attachment_url(request.messages)
            file_abs_url = urljoin(f"{DIAL_URL}/v1/", file_url)

            if file_abs_url.endswith(".pdf"):
                loader = PyPDFLoader(file_abs_url)
            else:
                loader = WebBaseLoader(file_abs_url)

            # Create the download stage to show to the user the active process.
            # After the loading is complete, the stage will auto finished.
            with choice.create_stage("Downloading the document"):
                try:
                    documents = loader.load()
                except Exception:
                    msg = "Error while loading the document. Please check that the URL you provided is correct."
                    raise DIALException(
                        status_code=400, message=msg, display_message=msg
                    )

            # Show the user the total number of parts in the resource
            with choice.create_stage(
                "Splitting the document into chunks"
            ) as stage:
                texts = text_splitter.split_documents(documents)
                stage.append_content(f"Total number of chunks: {len(texts)}")

            # Show the user start of calculating embeddings stage
            with choice.create_stage("Calculating embeddings"):

                openai_embedding = AzureOpenAIEmbeddings(
                    model=EMBEDDINGS_MODEL,
                    azure_deployment=EMBEDDINGS_MODEL,
                    azure_endpoint=DIAL_URL,
                    # Header propagation automatically propagates the API key from the request headers.
                    api_key=SecretStr("-"),
                    api_version=API_VERSION,
                    # The check leads to tokenization of the input strings.
                    # Tokenized input is only supported by OpenAI embedding models.
                    # For other models, the check should be disabled.
                    check_embedding_ctx_length=False,
                )

                embeddings = CacheBackedEmbeddings.from_bytes_store(
                    openai_embedding,
                    embedding_store,
                    namespace=sanitize_namespace(openai_embedding.model),
                )

                docsearch = Chroma.from_documents(
                    texts, embeddings, collection_name=collection_name
                )

            # CustomCallbackHandler allows to pass tokens to the users as they are generated, so as not to wait for a complete response.
            llm = AzureChatOpenAI(
                azure_deployment=CHAT_MODEL,
                azure_endpoint=DIAL_URL,
                # Header propagation automatically propagates the API key from the request headers.
                api_key=SecretStr("-"),
                api_version=API_VERSION,
                temperature=0,
                streaming=True,
                callbacks=[CustomCallbackHandler(choice)],
            )

            await response.aflush()

            qa = RetrievalQA.from_chain_type(
                llm=llm,
                chain_type="stuff",
                retriever=docsearch.as_retriever(search_kwargs={"k": 15}),