From 6f3a661f45b433a07cfbc99f5e57b16faac1d54f Mon Sep 17 00:00:00 2001
From: David Oplatka <david.oplatka@vectara.com>
Date: Sun, 19 Jan 2025 16:03:48 -0800
Subject: [PATCH] API Migration (#17545)

---
 docs/docs/examples/managed/vectaraDemo.ipynb  | 155 ++--
 .../Changelog.md                              |  11 +
 .../README.md                                 |  31 +-
 .../indices/managed/vectara/base.py           | 417 ++++++-----
 .../indices/managed/vectara/query.py          |  43 +-
 .../indices/managed/vectara/retriever.py      | 669 ++++++++++--------
 .../pyproject.toml                            |   2 +-
 .../tests/test_indices_managed_vectara.py     | 238 ++++++-
 .../llama-index-tools-vectara-query/README.md |   7 +-
 .../examples/vectara_query.ipynb              |   6 +-
 .../llama_index/tools/vectara_query/base.py   |  82 ++-
 .../pyproject.toml                            |   4 +-
 .../tests/test_tools_vectara_query.py         |  92 ++-
 .../llama-index-packs-vectara-rag/README.md   |   9 +-
 .../pyproject.toml                            |   4 +-
 15 files changed, 1066 insertions(+), 704 deletions(-)
 create mode 100644 llama-index-integrations/indices/llama-index-indices-managed-vectara/Changelog.md

diff --git a/docs/docs/examples/managed/vectaraDemo.ipynb b/docs/docs/examples/managed/vectaraDemo.ipynb
index cf931208ffa36..f5e381b057bd9 100644
--- a/docs/docs/examples/managed/vectaraDemo.ipynb
+++ b/docs/docs/examples/managed/vectaraDemo.ipynb
@@ -14,7 +14,7 @@
    "metadata": {},
    "source": [
     "# Vectara Managed Index\n",
-    "In this notebook we are going to show how to use [Vectara](https://vectara.com) with LlamaIndex.\n",
+    "In this notebook we are going to show how to use [Vectara](https://vectara.com) with LlamaIndex. Please note that this notebook is for Vectara ManagedIndex versions >=0.4.0.\n",
     "\n",
     "[Vectara](https://vectara.com/) is the trusted AI Assistant and Agent platform which focuses on enterprise readiness for mission-critical applications. \n",
     "\n",
@@ -63,7 +63,7 @@
    "source": [
     "To get started with Vectara, [sign up](https://vectara.com/integrations/llamaindex) (if you haven't already) and follow our [quickstart guide](https://docs.vectara.com/docs/quickstart) to create a corpus and an API key.\n",
     "\n",
-    "Once you have these, you can provide them as environment variables `VECTARA_CUSTOMER_ID`, `VECTARA_CORPUS_ID`, and `VECTARA_API_KEY`. Make sure your API key has both query and index permissions."
+    "Once you have these, you can provide them as environment variables `VECTARA_CORPUS_KEY`, and `VECTARA_API_KEY`. Make sure your API key has both query and index permissions."
    ]
   },
   {
@@ -76,7 +76,7 @@
     "There are a few ways you can index your data into Vectara, including:\n",
     "1. With the `from_documents()` or `insert_file()` methods of `VectaraIndex`\n",
     "2. Uploading files directly in the [Vectara console](https://console.vectara.com/)\n",
-    "3. Using Vectara's FILE_UPLOAD or standard indexing APIs\n",
+    "3. Using Vectara's [file upload](https://docs.vectara.com/docs/rest-api/upload-file) or [document index](https://docs.vectara.com/docs/rest-api/create-corpus-document) APIs\n",
     "4. Using [vectara-ingest](https://github.com/vectara/vectara-ingest), an open source crawler/indexer project\n",
     "5. Using one of our ingest integration partners like Airbyte, Unstructured or DataVolo.\n",
     "\n",
@@ -152,7 +152,7 @@
     {
      "data": {
       "text/plain": [
-       "\"The risks of AI include biased data and discriminatory outcomes, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems [1]. These risks can lead to harm to individuals and communities, including the potential for meaningful impact on people's rights, opportunities, or access [6]. To mitigate these risks, it is essential to identify and address potential harms before deployment, and to engage with impacted communities to understand the potential harms of technologies and build protection by design into future systems [1][6]. Additionally, strong safety regulations and measures to address harms when they occur can enhance innovation in the context of complex technologies [2]. The development of technical standards and practices tailored for particular sectors and contexts can also help to ensure safe and effective AI systems [7].\""
+       "'The risks of AI include biased data and discriminatory outcomes, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems [1]. These risks can have significant impacts on individuals and communities, particularly those who are directly affected by AI systems [5]. To mitigate these risks, it is essential to identify and address potential risks before deployment, and to implement ongoing monitoring and mitigation strategies [2][6]. This includes risk assessments, auditing mechanisms, and public consultation to ensure that AI systems are designed and used in a responsible and transparent manner [2][6]. Additionally, the development of AI systems should be guided by principles that prioritize lawfulness, accuracy, and transparency, and that are regularly monitored and accountable [7].'"
       ]
      },
      "execution_count": null,
@@ -162,7 +162,10 @@
    ],
    "source": [
     "qe = index.as_query_engine(\n",
-    "    summary_enabled=True, summary_prompt_name=\"mockingbird-1.0-2024-07-16\"\n",
+    "    n_sentences_before=1,\n",
+    "    n_sentences_after=1,\n",
+    "    summary_enabled=True,\n",
+    "    summary_prompt_name=\"mockingbird-1.0-2024-07-16\",\n",
     ")\n",
     "qe.query(questions[0]).response"
    ]
@@ -185,20 +188,21 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The risks of AI include biased data and discriminatory outcomes, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems [1]. These risks can lead to harm to individuals and communities, including the potential for meaningful impact on people's rights, opportunities, or access [6]. To mitigate these risks, it is essential to identify and address potential harms before deployment, and to engage with impacted communities to understand the potential harms of technologies and build protection by design into future systems [1][6]. Additionally, strong safety regulations and measures to address harms when they occur can enhance innovation in the context of complex technologies [2]. The development of technical standards and practices tailored for particular sectors and contexts can also help to ensure safe and effective AI systems [7]."
+      "The risks of AI include biased data and discriminatory outcomes, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems [1]. These risks can have significant impacts on individuals and communities, particularly those who are directly affected by AI systems [5]. To mitigate these risks, it is essential to identify and address potential risks before deployment, and to implement ongoing monitoring and mitigation strategies [2][6]. This includes risk assessments, auditing mechanisms, and public consultation to ensure that AI systems are designed and used in a responsible and transparent manner [2][6]. Additionally, the development of AI systems should be guided by principles that prioritize lawfulness, accuracy, and transparency, and that are regularly monitored and accountable [7]."
      ]
     }
    ],
    "source": [
     "qe = index.as_query_engine(\n",
+    "    n_sentences_before=1,\n",
+    "    n_sentences_after=1,\n",
     "    summary_enabled=True,\n",
     "    summary_prompt_name=\"mockingbird-1.0-2024-07-16\",\n",
     "    streaming=True,\n",
     ")\n",
     "response = qe.query(questions[0])\n",
     "\n",
-    "for chunk in response.response_gen:\n",
-    "    print(chunk.delta or \"\", end=\"\", flush=True)"
+    "response.print_response_stream()"
    ]
   },
   {
@@ -220,7 +224,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ce = index.as_chat_engine()"
+    "ce = index.as_chat_engine(n_sentences_before=1, n_sentences_after=1)"
    ]
   },
   {
@@ -235,15 +239,15 @@
      "text": [
       "Question: What are the risks of AI?\n",
       "\n",
-      "Response: The risks of AI include potential biased data leading to discriminatory outcomes, opaque decision-making processes, lack of public trust, and understanding of algorithmic systems. Risks also involve safety concerns, such as AI systems violating safety regulations, causing harm, and impacting individuals' rights and opportunities. Furthermore, the misuse of AI, like AI-enabled technology for creating non-consensual images, poses significant risks, affecting individuals' lives and well-being. It is crucial to identify and mitigate risks before deployment, prioritize high-impact risks, and ensure that automated systems do not violate safety standards or harm individuals. Stakeholder engagement, transparency, and ongoing monitoring are essential to address these risks effectively.\n",
+      "Response: The risks of AI include potential biases and discriminatory outcomes due to biased data, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems. Mitigating these risks involves ongoing transparency, participatory design, explanations for stakeholders, and public consultation [1]. Industry is developing innovative solutions like risk assessments, auditing mechanisms, and monitoring tools to ensure the safety and efficacy of AI systems [2]. Identifying and mitigating risks before deployment is crucial, focusing on impacts on rights, opportunities, and communities, as well as risks from misuse of the system [6]. The Executive Order on Trustworthy AI in the Federal Government outlines principles for lawful, purposeful, accurate, safe, understandable, responsible, monitored, transparent, and accountable AI use [7].\n",
       "\n",
       "Question: What should we do to prevent bad actors from using AI?\n",
       "\n",
-      "Response: To prevent bad actors from using AI, we should implement safeguards such as ensuring systems are safe, effective, and respect privacy [1]. Additionally, it is crucial to adhere to principles like legality, transparency, and accountability in AI development and usage [2]. Moreover, best practices should be followed to protect against algorithmic discrimination and ensure fairness in all aspects of people's lives [6]. Lastly, it is essential to evaluate, protect, and redress harms at both individual and community levels, especially for underserved communities, to promote equity and fair treatment for all [5].\n",
+      "Response: To prevent bad actors from using AI, we should implement a set of principles and practices to ensure the safe and effective use of AI systems. This includes adhering to specific principles such as legality, respect for values, accuracy, reliability, safety, transparency, and accountability in the design and use of AI [2]. Additionally, entities should follow privacy and security best practices to prevent data leaks and employ audits and impact assessments to identify and mitigate algorithmic discrimination [3][4]. It is crucial to involve the public in discussions about the promises and potential harms of AI technologies to shape policies that protect against discrimination and ensure fairness in the use of automated systems [1][6][7]. By promoting transparency, ongoing monitoring, and public consultation, we can work towards building trust, understanding, and ethical use of AI while safeguarding against misuse by bad actors.\n",
       "\n",
       "Question: What are the benefits?\n",
       "\n",
-      "Response: The benefits of AI include the potential to build innovative infrastructure, improve Americans' lives, provide faster customer care, enhance decision-making in various sectors, revolutionize industries, and make life better for everyone. AI can help farmers grow food efficiently, predict storm paths, identify diseases, and drive important decisions globally. Additionally, AI can be used to protect individuals from unsafe systems and ensure the accountability, transparency, and reliability of AI technologies. Overall, AI holds the promise to positively impact society and improve various aspects of life.\n",
+      "Response: The benefits of AI include the potential to build innovative infrastructure, improve customer service through faster responses, and enhance decision-making processes. AI can also lead to transformative improvements in people's lives, protect individuals from potential harms, and ensure the ethical use of automated systems. By incorporating principles for responsible stewardship and trustworthy AI, companies and government agencies can create safe, effective, and transparent AI systems that respect values, ensure accuracy, and promote accountability [1][4][6][7].\n",
       "\n"
      ]
     }
@@ -270,7 +274,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "ce = index.as_chat_engine(streaming=True)"
+    "ce = index.as_chat_engine(\n",
+    "    n_sentences_before=1, n_sentences_after=1, streaming=True\n",
+    ")"
    ]
   },
   {
@@ -283,14 +289,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Artificial intelligence will not rule the government. The government is implementing principles and guidelines to ensure the ethical and responsible use of AI in various sectors, including the federal government. These measures focus on transparency, accountability, safety, and adherence to national values, ensuring that AI is used in a manner that upholds civil rights, democratic values, and national security. Sector-specific guidance will continue to evolve to address specific AI applications, but overall, the government remains committed to safeguarding civil liberties and democratic principles in the age of artificial intelligence."
+      "Artificial intelligence will not rule the government. The government has established principles and guidelines for the ethical use of AI, ensuring it is used responsibly, lawfully, and in alignment with the nation's values. These principles emphasize safety, accountability, transparency, and regular monitoring of AI systems within the federal government [1] [2]. Additionally, there are specific considerations for law enforcement and national security activities, highlighting the need for oversight and adherence to existing policies and safeguards [3]. The government is focused on promoting equity, fairness, civil rights, and racial justice through the use of AI, guided by principles that protect the American public [5]. Transparency and accountability are key aspects to ensure that AI technologies are used in ways that respect people's rights and expectations [7]."
      ]
     }
    ],
    "source": [
     "response = ce.stream_chat(\"Will artificial intelligence rule the government?\")\n",
-    "for chunk in response.chat_stream:\n",
-    "    print(chunk.delta or \"\", end=\"\", flush=True)"
+    "\n",
+    "response.print_response_stream()"
    ]
   },
   {
@@ -329,116 +335,65 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Initializing vectara-agentic version 0.1.16...\n",
-      "No observer set.\n",
-      "> Running step 26a91fbd-0027-42af-96f8-fcf19bd9bf06. Step input: What are the risks of AI? What are the benefits? Compare and contrast and provide a summary with arguments for and against from experts.\n",
+      "Failed to set up observer (No module named 'phoenix.otel'), ignoring\n",
+      "> Running step 21fe2d4d-c74c-45df-9921-94c7f9e4f670. Step input: What are the risks of AI? What are the benefits? Compare and contrast and provide a summary with arguments for and against from experts.\n",
       "\u001b[1;3;38;5;200mThought: The current language of the user is: English. I need to use a tool to help me answer the question.\n",
       "Action: query_ai\n",
-      "Action Input: {'query': 'risks and benefits of AI, expert opinions, arguments for and against'}\n",
+      "Action Input: {'query': 'risks and benefits of AI, expert opinions'}\n",
       "\u001b[0m\u001b[1;3;34mObservation: \n",
-      "                    Response: '''Here is a summary of the risks and benefits of AI, expert opinions, and arguments for and against, based on the provided sources:\n",
-      "\n",
-      "**Risks of AI:**\n",
-      "\n",
-      "* Bias and discriminatory outcomes due to biased data [1]\n",
-      "* Opaque decision-making processes [1]\n",
-      "* Lack of public trust and understanding of algorithmic systems [1]\n",
-      "* Potential for harm to individuals, organizations, and society [3]\n",
-      "\n",
-      "**Benefits of AI:**\n",
-      "\n",
-      "* Potential to build better and more innovative infrastructure [2]\n",
-      "* Ability to mitigate risks to safety and efficacy of AI systems [3]\n",
-      "* Transformative potential to improve Americans' lives [4]\n",
-      "\n",
-      "**Expert Opinions:**\n",
-      "\n",
-      "* Emphasis on designing non-discriminatory technology, explainable AI, and human-computer interaction with community participation [1]\n",
-      "* Importance of placing trust in people, not technologies, and engaging with impacted communities to understand potential harms [1]\n",
-      "* Need for ongoing transparency, value-sensitive and participatory design, and public consultation [1]\n",
-      "\n",
-      "**Arguments for and Against:**\n",
-      "\n",
-      "* Strong safety regulations and measures to address harms can enhance innovation in complex technologies [3]\n",
-      "* Balancing ownership rights, use rights, and community health, safety, and welfare is critical in the integration of technology [2]\n",
-      "* Examining lessons learned from urban planning can inform the development of AI-enabled systems [2]\n",
-      "\n",
-      "Overall, the sources highlight the importance of designing AI systems that are non-discriminatory, transparent, and trustworthy, while also considering the potential benefits and risks of AI.'''\n",
+      "                    Response: '''According to expert opinions, the risks of AI include biased data and discriminatory outcomes, opaque decision-making processes, and lack of public trust and understanding of algorithmic systems [1]. To mitigate these risks, experts emphasize the importance of ongoing transparency, value-sensitive and participatory design, explanations designed for relevant stakeholders, and public consultation [1]. Additionally, industry is providing innovative solutions to mitigate risks to the safety and efficacy of AI systems, including risk assessments, auditing mechanisms, and documentation procedures [3]. The National Institute of Standards and Technology (NIST) is developing a risk management framework to better manage risks posed to individuals, organizations, and society by AI [3]. Furthermore, the White House Office of Science and Technology Policy has led a year-long process to seek input from people across the country on the issue of algorithmic and data-driven harms and potential remedies [4].'''\n",
       "                    References:\n",
-      "                    [1]: page='56'; title='Blueprint for an AI Bill of Rights'; section='4'; CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'.\n",
-      "[2]: page='56'; title='Blueprint for an AI Bill of Rights'; section='4'; CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'.\n",
-      "[3]: page='1'; section='1'; CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'; title='Blueprint for an AI Bill of Rights'.\n",
-      "[4]: page='1'; section='1'; CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'; title='Blueprint for an AI Bill of Rights'.\n",
+      "                    [1]: CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'; title='Blueprint for an AI Bill of Rights'.\n",
+      "[3]: CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'; title='Blueprint for an AI Bill of Rights'.\n",
+      "[4]: CreationDate='1663695035'; Producer='iLovePDF'; Title='Blueprint for an AI Bill of Rights'; Creator='Adobe Illustrator 26.3 (Macintosh)'; ModDate='1664808078'; name='AI bill of rights'; year='2022'; framework='llama_index'; title='Blueprint for an AI Bill of Rights'.\n",
       "\n",
       "                \n",
-      "\u001b[0m> Running step f9d86544-5c76-48c5-8efd-02c154945e08. Step input: None\n",
+      "\u001b[0m> Running step a2b4d751-9f91-4fd9-9004-e276da54b75f. Step input: None\n",
       "\u001b[1;3;38;5;200mThought: I can answer without using any more tools. I'll use the user's language to answer\n",
-      "Answer: Here is a summary of the risks and benefits of AI, along with expert opinions and arguments for and against:\n",
+      "Answer: The risks and benefits of AI are widely discussed among experts, and there are several key points to consider.\n",
       "\n",
       "**Risks of AI:**\n",
-      "- AI systems can produce biased and discriminatory outcomes if they are trained on biased data [1].\n",
-      "- The decision-making processes of AI can be opaque, making it difficult to understand how conclusions are reached [1].\n",
-      "- There is a lack of public trust and understanding of algorithmic systems, which can lead to skepticism and resistance [1].\n",
-      "- AI has the potential to cause harm to individuals, organizations, and society if not properly managed [3].\n",
+      "1. **Bias and Discrimination:** AI systems can perpetuate and even amplify biases present in the data they are trained on, leading to discriminatory outcomes.\n",
+      "2. **Opaque Decision-Making:** The decision-making processes of AI systems can be difficult to understand, leading to a lack of transparency.\n",
+      "3. **Public Trust:** There is often a lack of public trust and understanding of how AI systems work, which can hinder their acceptance and use.\n",
+      "\n",
+      "To mitigate these risks, experts suggest measures such as ensuring transparency, involving stakeholders in the design process, providing clear explanations, and conducting public consultations. Additionally, there are efforts to develop frameworks and guidelines, such as the National Institute of Standards and Technology (NIST) risk management framework, to manage these risks effectively.\n",
       "\n",
       "**Benefits of AI:**\n",
-      "- AI has the potential to build better and more innovative infrastructure, enhancing efficiency and effectiveness [2].\n",
-      "- It can help mitigate risks to the safety and efficacy of various systems, improving overall safety standards [3].\n",
-      "- AI holds transformative potential to improve the quality of life, offering advancements in various sectors [4].\n",
+      "1. **Efficiency and Productivity:** AI can automate repetitive tasks, leading to increased efficiency and productivity in various industries.\n",
+      "2. **Innovation:** AI drives innovation by enabling new applications and solutions that were not possible before.\n",
+      "3. **Improved Decision-Making:** AI can process large amounts of data quickly, providing insights that can improve decision-making processes.\n",
       "\n",
       "**Expert Opinions:**\n",
-      "- Experts emphasize the need for designing non-discriminatory technology, explainable AI, and fostering human-computer interaction with community participation [1].\n",
-      "- Trust should be placed in people rather than technologies, and it is crucial to engage with impacted communities to understand potential harms [1].\n",
-      "- Ongoing transparency, value-sensitive and participatory design, and public consultation are necessary for the responsible development of AI [1].\n",
-      "\n",
-      "**Arguments for and Against:**\n",
-      "- Implementing strong safety regulations and measures to address potential harms can enhance innovation in complex technologies [3].\n",
-      "- Balancing ownership rights, use rights, and community health, safety, and welfare is critical in the integration of AI technology [2].\n",
-      "- Lessons learned from urban planning can inform the development of AI-enabled systems, ensuring they are beneficial and sustainable [2].\n",
-      "\n",
-      "Overall, the sources highlight the importance of designing AI systems that are non-discriminatory, transparent, and trustworthy, while also considering the potential benefits and risks of AI.\n",
+      "Experts argue for the benefits of AI in terms of its potential to transform industries and improve quality of life. However, they also caution against the risks, emphasizing the need for responsible development and deployment of AI technologies. The balance between leveraging AI's benefits and managing its risks is crucial for its successful integration into society.\n",
       "\n",
       "References:\n",
-      "[1]: Blueprint for an AI Bill of Rights\n",
-      "[2]: Blueprint for an AI Bill of Rights\n",
-      "[3]: Blueprint for an AI Bill of Rights\n",
-      "[4]: Blueprint for an AI Bill of Rights\n",
-      "\u001b[0mTime taken: 21.543328046798706\n"
+      "- [Blueprint for an AI Bill of Rights](https://www.whitehouse.gov/ostp/ai-bill-of-rights/)\n",
+      "\u001b[0mTime taken: 20.452504634857178\n"
      ]
     },
     {
      "data": {
       "text/markdown": [
-       "Here is a summary of the risks and benefits of AI, along with expert opinions and arguments for and against:\n",
+       "The risks and benefits of AI are widely discussed among experts, and there are several key points to consider.\n",
        "\n",
        "**Risks of AI:**\n",
-       "- AI systems can produce biased and discriminatory outcomes if they are trained on biased data [1].\n",
-       "- The decision-making processes of AI can be opaque, making it difficult to understand how conclusions are reached [1].\n",
-       "- There is a lack of public trust and understanding of algorithmic systems, which can lead to skepticism and resistance [1].\n",
-       "- AI has the potential to cause harm to individuals, organizations, and society if not properly managed [3].\n",
+       "1. **Bias and Discrimination:** AI systems can perpetuate and even amplify biases present in the data they are trained on, leading to discriminatory outcomes.\n",
+       "2. **Opaque Decision-Making:** The decision-making processes of AI systems can be difficult to understand, leading to a lack of transparency.\n",
+       "3. **Public Trust:** There is often a lack of public trust and understanding of how AI systems work, which can hinder their acceptance and use.\n",
+       "\n",
+       "To mitigate these risks, experts suggest measures such as ensuring transparency, involving stakeholders in the design process, providing clear explanations, and conducting public consultations. Additionally, there are efforts to develop frameworks and guidelines, such as the National Institute of Standards and Technology (NIST) risk management framework, to manage these risks effectively.\n",
        "\n",
        "**Benefits of AI:**\n",
-       "- AI has the potential to build better and more innovative infrastructure, enhancing efficiency and effectiveness [2].\n",
-       "- It can help mitigate risks to the safety and efficacy of various systems, improving overall safety standards [3].\n",
-       "- AI holds transformative potential to improve the quality of life, offering advancements in various sectors [4].\n",
+       "1. **Efficiency and Productivity:** AI can automate repetitive tasks, leading to increased efficiency and productivity in various industries.\n",
+       "2. **Innovation:** AI drives innovation by enabling new applications and solutions that were not possible before.\n",
+       "3. **Improved Decision-Making:** AI can process large amounts of data quickly, providing insights that can improve decision-making processes.\n",
        "\n",
        "**Expert Opinions:**\n",
-       "- Experts emphasize the need for designing non-discriminatory technology, explainable AI, and fostering human-computer interaction with community participation [1].\n",
-       "- Trust should be placed in people rather than technologies, and it is crucial to engage with impacted communities to understand potential harms [1].\n",
-       "- Ongoing transparency, value-sensitive and participatory design, and public consultation are necessary for the responsible development of AI [1].\n",
-       "\n",
-       "**Arguments for and Against:**\n",
-       "- Implementing strong safety regulations and measures to address potential harms can enhance innovation in complex technologies [3].\n",
-       "- Balancing ownership rights, use rights, and community health, safety, and welfare is critical in the integration of AI technology [2].\n",
-       "- Lessons learned from urban planning can inform the development of AI-enabled systems, ensuring they are beneficial and sustainable [2].\n",
-       "\n",
-       "Overall, the sources highlight the importance of designing AI systems that are non-discriminatory, transparent, and trustworthy, while also considering the potential benefits and risks of AI.\n",
+       "Experts argue for the benefits of AI in terms of its potential to transform industries and improve quality of life. However, they also caution against the risks, emphasizing the need for responsible development and deployment of AI technologies. The balance between leveraging AI's benefits and managing its risks is crucial for its successful integration into society.\n",
        "\n",
        "References:\n",
-       "[1]: Blueprint for an AI Bill of Rights\n",
-       "[2]: Blueprint for an AI Bill of Rights\n",
-       "[3]: Blueprint for an AI Bill of Rights\n",
-       "[4]: Blueprint for an AI Bill of Rights"
+       "- [Blueprint for an AI Bill of Rights](https://www.whitehouse.gov/ostp/ai-bill-of-rights/)"
       ],
       "text/plain": [
        "<IPython.core.display.Markdown object>"
@@ -456,10 +411,10 @@
     "    tool_name=\"query_ai\",\n",
     "    data_description=\"AI regulations\",\n",
     "    assistant_specialty=\"artificial intelligence\",\n",
-    "    vectara_summary_num_results=5,\n",
-    "    vectara_summarizer=\"mockingbird-1.0-2024-07-16\",\n",
     "    vectara_reranker=\"mmr\",\n",
     "    vectara_rerank_k=50,\n",
+    "    vectara_summary_num_results=5,\n",
+    "    vectara_summarizer=\"mockingbird-1.0-2024-07-16\",\n",
     "    verbose=True,\n",
     ")\n",
     "\n",
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/Changelog.md b/llama-index-integrations/indices/llama-index-indices-managed-vectara/Changelog.md
new file mode 100644
index 0000000000000..2d038347be523
--- /dev/null
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/Changelog.md
@@ -0,0 +1,11 @@
+# CHANGELOG — llama-index-indices-managed-vectara
+
+## [0.4.0]
+
+Implementation switched from using Vectara API v1 to API v2.
+There are a number of breaking changes involved with this transition:
+
+1. The `vectara_customer_id` parameter was removed from `VectaraIndex`. You no longer need to specify this information when you instantiate an index nor provide the environment variable `VECTARA_CUSTOMER_ID`.
+2. The `vectara_corpus_id` parameter was replaced with `vectara_corpus_key`. When creating a `VectaraIndex` object, please either specify `vectara_corpus_key` explicitly or add `VECTARA_CORPUS_KEY` to your environment. This should use the corpus key of your Vectara corpus rather than the corpus ID.
+3. The `add_documents()` function was removed and replaced with two new functions for indexing documents. If you want to use the Structured Document type, use the new `add_document()` function. If you would like to use the Core Document type, use the new `add_nodes()` function.
+4. For specifying reranker types, `"udf"` has been replaced with `"userfn"`.
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md b/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md
index 2db7ebaec9a59..f81b4b2b3cd22 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/README.md
@@ -17,30 +17,39 @@ Finally, set up your Vectara corpus. If you don't have a Vectara account, you ca
 
 ## Usage
 
+Please note that this usage example is for versions >= 0.4.0 and will not be the same as for earlier versions of Vectara ManagedIndex.
+
 First let's initialize the index with some sample documents.
+Make sure to always specify a unique `id_` for every document you add to your index.
+If you don't specify this parameter, a random id will be generated and the document will be separately added to your corpus every time you run your code.
 
 ```python
 import os
 
 os.environ["VECTARA_API_KEY"] = "<YOUR_VECTARA_API_KEY>"
-os.environ["VECTARA_CORPUS_ID"] = "<YOUR_VECTARA_CORPUS_ID>"
-os.environ["VECTARA_CUSTOMER_ID"] = "<YOUR_VECTARA_CUSTOMER_ID>"
+os.environ["VECTARA_CORPUS_KEY"] = "<YOUR_VECTARA_CORPUS_KEY>"
 
 from llama_index.indices.managed.vectara import VectaraIndex
-from llama_index.core.schema import Document
+from llama_index.core.schema import Document, MediaResource
 
 docs = [
     Document(
-        text="""
-        This is test text for Vectara integration with LlamaIndex.
-        Users should love their experience with this integration
-        """,
+        id_="doc1",
+        text_resource=MediaResource(
+            text="""
+            This is test text for Vectara integration with LlamaIndex.
+            Users should love their experience with this integration
+            """,
+        ),
     ),
     Document(
-        text="""
-        The Vectara index integration with LlamaIndex implements Vectara's RAG pipeline.
-        It can be used both as a retriever and query engine.
-        """,
+        id_="doc2",
+        text_resource=MediaResource(
+            text="""
+            The Vectara index integration with LlamaIndex implements Vectara's RAG pipeline.
+            It can be used both as a retriever and query engine.
+            """,
+        ),
     ),
 ]
 
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py
index 46a95590ef673..e040c4f6516a4 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/base.py
@@ -9,8 +9,8 @@
 import logging
 import os
 from concurrent.futures import ThreadPoolExecutor
-from hashlib import blake2b
-from typing import Any, Dict, List, Optional, Sequence, Type
+from typing import Any, List, Optional, Sequence, Type, Dict
+from functools import lru_cache
 
 import requests
 from llama_index.core.base.base_query_engine import BaseQueryEngine
@@ -21,10 +21,8 @@
 from llama_index.core.indices.managed.base import BaseManagedIndex, IndexType
 from llama_index.core.llms.utils import LLMType, resolve_llm
 from llama_index.core.schema import (
-    BaseNode,
     Document,
-    MetadataMode,
-    TextNode,
+    Node,
     TransformComponent,
 )
 from llama_index.core.settings import Settings
@@ -63,11 +61,8 @@ class VectaraIndex(BaseManagedIndex):
     def __init__(
         self,
         show_progress: bool = False,
-        nodes: Optional[Sequence[BaseNode]] = None,
-        vectara_customer_id: Optional[str] = None,
-        vectara_corpus_id: Optional[str] = None,
+        vectara_corpus_key: Optional[str] = None,
         vectara_api_key: Optional[str] = None,
-        use_core_api: bool = False,
         parallelize_ingest: bool = False,
         x_source_str: str = "llama_index",
         **kwargs: Any,
@@ -75,7 +70,7 @@ def __init__(
         """Initialize the Vectara API."""
         self.parallelize_ingest = parallelize_ingest
         index_struct = VectaraIndexStruct(
-            index_id=str(vectara_corpus_id),
+            index_id=str(vectara_corpus_key),
             summary="Vectara Index",
         )
 
@@ -84,25 +79,19 @@ def __init__(
             index_struct=index_struct,
             **kwargs,
         )
-        self._vectara_customer_id = vectara_customer_id or os.environ.get(
-            "VECTARA_CUSTOMER_ID"
-        )
-        self._vectara_corpus_id = vectara_corpus_id or str(
-            os.environ.get("VECTARA_CORPUS_ID")
+
+        self._vectara_corpus_key = vectara_corpus_key or str(
+            os.environ.get("VECTARA_CORPUS_KEY")
         )
+
         self._vectara_api_key = vectara_api_key or os.environ.get("VECTARA_API_KEY")
-        if (
-            self._vectara_customer_id is None
-            or self._vectara_corpus_id is None
-            or self._vectara_api_key is None
-        ):
+        if self._vectara_corpus_key is None or self._vectara_api_key is None:
             _logger.warning(
-                "Can't find Vectara credentials, customer_id or corpus_id in "
-                "environment."
+                "Can't find Vectara credentials or corpus_key in environment."
             )
             raise ValueError("Missing Vectara credentials")
         else:
-            _logger.debug(f"Using corpus id {self._vectara_corpus_id}")
+            _logger.debug(f"Using corpus key {self._vectara_corpus_key}")
 
         # identifies usage source for internal measurement
         self._x_source_str = x_source_str
@@ -113,183 +102,216 @@ def __init__(
         adapter = requests.adapters.HTTPAdapter(max_retries=3)
         self._session.mount("https://", adapter)
         self.vectara_api_timeout = 90
-        self.use_core_api = use_core_api
         self.doc_ids: List[str] = []
 
-        # if nodes is specified, consider each node as a single document
-        # and use _build_index_from_nodes() to add them to the index
-        if nodes is not None:
-            self._build_index_from_nodes(nodes, use_core_api)
-
-    def _build_index_from_nodes(
-        self, nodes: Sequence[BaseNode], use_core_api: bool = False
-    ) -> IndexDict:
-        docs = [
-            Document(
-                text=node.get_content(metadata_mode=MetadataMode.NONE),
-                metadata=node.metadata,  # type: ignore
-                id_=node.id_,  # type: ignore
-            )
-            for node in nodes
-        ]
-        self.add_documents(docs, use_core_api)
-        return self.index_struct
-
-    def _get_corpus_id(self, corpus_id: str) -> str:
+    @lru_cache(maxsize=None)
+    def _get_corpus_key(self, corpus_key: str) -> str:
         """
-        Get the corpus id to use for the index.
-        If corpus_id is provided, check if it is one of the valid corpus ids.
-        If not, use the first corpus id in the list.
+        Get the corpus key to use for the index.
+        If corpus_key is provided, check if it is one of the valid corpus keys.
+        If not, use the first corpus key in the list.
         """
-        if corpus_id is not None:
-            if corpus_id in self._vectara_corpus_id.split(","):
-                return corpus_id
-        return self._vectara_corpus_id.split(",")[0]
+        if corpus_key is not None:
+            if corpus_key in self._vectara_corpus_key.split(","):
+                return corpus_key
+        return self._vectara_corpus_key.split(",")[0]
 
     def _get_post_headers(self) -> dict:
         """Returns headers that should be attached to each post request."""
         return {
             "x-api-key": self._vectara_api_key,
-            "customer-id": self._vectara_customer_id,
             "Content-Type": "application/json",
             "X-Source": self._x_source_str,
         }
 
-    def _delete_doc(self, doc_id: str, corpus_id: Optional[str] = None) -> bool:
+    def _delete_doc(self, doc_id: str, corpus_key: Optional[str] = None) -> bool:
         """
         Delete a document from the Vectara corpus.
 
         Args:
-            url (str): URL of the page to delete.
             doc_id (str): ID of the document to delete.
-            corpus_id (str): corpus ID to delete the document from.
+            corpus_key (str): corpus key to delete the document from.
 
         Returns:
             bool: True if deletion was successful, False otherwise.
         """
-        valid_corpus_id = self._get_corpus_id(corpus_id)
-        body = {
-            "customerId": self._vectara_customer_id,
-            "corpusId": valid_corpus_id,
-            "documentId": doc_id,
-        }
-        response = self._session.post(
-            "https://api.vectara.io/v1/delete-doc",
+        valid_corpus_key = self._get_corpus_key(corpus_key)
+        body = {}
+        response = self._session.delete(
+            f"https://api.vectara.io/v2/corpora/{valid_corpus_key}/documents/{doc_id}",
             data=json.dumps(body),
             verify=True,
             headers=self._get_post_headers(),
             timeout=self.vectara_api_timeout,
         )
 
-        if response.status_code != 200:
+        if response.status_code != 204:
             _logger.error(
                 f"Delete request failed for doc_id = {doc_id} with status code "
-                f"{response.status_code}, reason {response.reason}, text "
-                f"{response.text}"
+                f"{response.status_code}, text {response.json()['messages'][0]}"
             )
             return False
         return True
 
-    def _index_doc(self, doc: dict, corpus_id) -> str:
-        request: Dict[str, Any] = {}
-        request["customerId"] = self._vectara_customer_id
-        request["corpusId"] = corpus_id
-        request["document"] = doc
-
-        if "parts" in doc:
-            api_url = "https://api.vectara.io/v1/core/index"
-        else:
-            api_url = "https://api.vectara.io/v1/index"
-
+    def _index_doc(self, doc: dict, corpus_key) -> str:
         response = self._session.post(
             headers=self._get_post_headers(),
-            url=api_url,
-            data=json.dumps(request),
+            url=f"https://api.vectara.io/v2/corpora/{corpus_key}/documents",
+            data=json.dumps(doc),
             timeout=self.vectara_api_timeout,
             verify=True,
         )
 
         status_code = response.status_code
-        result = response.json()
-
-        status_str = result["status"]["code"] if "status" in result else None
-        if status_code == 409 and status_str and (status_str == "ALREADY_EXISTS"):
-            return "E_ALREADY_EXISTS"
-        elif status_code == 200 and status_str and (status_str == "INVALID_ARGUMENT"):
-            return "E_INVALID_ARGUMENT"
-        elif status_str and (status_str == "FORBIDDEN"):
-            return "E_NO_PERMISSIONS"
-        else:
+        if status_code == 201:
             return "E_SUCCEEDED"
 
+        result = response.json()
+        return result["messages"][0]
+
     def _insert(
         self,
-        nodes: Sequence[BaseNode],
-        corpus_id: Optional[str] = None,
-        use_core_api: bool = False,
+        document: Optional[Document] = None,
+        nodes: Optional[Sequence[Node]] = None,
+        corpus_key: Optional[str] = None,
         **insert_kwargs: Any,
     ) -> None:
-        """Insert a set of documents (each a node)."""
+        """
+        Insert a document into a corpus using Vectara's indexing API.
 
-        def gen_hash(s: str) -> str:
-            hash_object = blake2b(digest_size=32)
-            hash_object.update(s.encode("utf-8"))
-            return hash_object.hexdigest()
+        Args:
+            document (Document): a document to index using Vectara's Structured Document type.
+            nodes (Sequence[Node]): a list of nodes representing document parts to index a document using Vectara's Core Document type.
+            corpus_key (str): If multiple corpora are provided for this index, the corpus_key of the corpus you want to add the document to.
+        """
+        if document:
+            # Use Structured Document type
+            metadata = document.metadata.copy()
+            metadata["framework"] = "llama_index"
+            doc = {
+                "id": document.id_,
+                "type": "structured",
+                "metadata": metadata,
+                "sections": [{"text": document.text_resource.text}],
+            }
+
+            if "title" in insert_kwargs and insert_kwargs["title"]:
+                doc["title"] = insert_kwargs["title"]
+
+            if "description" in insert_kwargs and insert_kwargs["description"]:
+                doc["description"] = insert_kwargs["description"]
 
-        docs = []
-        for node in nodes:
-            metadata = node.metadata.copy()
+            if (
+                "max_chars_per_chunk" in insert_kwargs
+                and insert_kwargs["max_chars_per_chunk"]
+            ):
+                doc["chunking_strategy"] = {
+                    "type": "max_chars_chunking_strategy",
+                    "max_chars_per_chunk": insert_kwargs["max_chars_per_chunk"],
+                }
+
+        elif nodes:
+            # Use Core Document type
+            metadata = insert_kwargs["doc_metadata"]
             metadata["framework"] = "llama_index"
-            section_key = "parts" if use_core_api else "section"
-            text = node.get_content(metadata_mode=MetadataMode.NONE)
-            doc_id = gen_hash(text)
             doc = {
-                "documentId": doc_id,
-                "metadataJson": json.dumps(node.metadata),
-                section_key: [{"text": text}],
+                "id": insert_kwargs["doc_id"],
+                "type": "core",
+                "metadata": metadata,
+                "document_parts": [
+                    {"text": node.text_resource.text, "metadata": node.metadata}
+                    for node in nodes
+                ],
             }
-            docs.append(doc)
 
-        valid_corpus_id = self._get_corpus_id(corpus_id)
+        else:
+            _logger.error(
+                "Error indexing document. Must provide either a document or a list of nodes."
+            )
+            return
+
+        valid_corpus_key = self._get_corpus_key(corpus_key)
         if self.parallelize_ingest:
             with ThreadPoolExecutor() as executor:
-                futures = [
-                    executor.submit(self._index_doc, doc, valid_corpus_id)
-                    for doc in docs
-                ]
-                for future in futures:
-                    ecode = future.result()
-                    if ecode != "E_SUCCEEDED":
-                        _logger.error(
-                            f"Error indexing document in Vectara with error code {ecode}"
-                        )
-            self.doc_ids.extend([doc["documentId"] for doc in docs])
-        else:
-            for doc in docs:
-                ecode = self._index_doc(doc, valid_corpus_id)
+                future = executor.submit(self._index_doc, doc, valid_corpus_key)
+                ecode = future.result()
                 if ecode != "E_SUCCEEDED":
                     _logger.error(
                         f"Error indexing document in Vectara with error code {ecode}"
                     )
-                self.doc_ids.append(doc["documentId"])
+            self.doc_ids.append(doc["id"])
+        else:
+            ecode = self._index_doc(doc, valid_corpus_key)
+            if ecode != "E_SUCCEEDED":
+                _logger.error(
+                    f"Error indexing document in Vectara with error code {ecode}"
+                )
+            self.doc_ids.append(doc["id"])
 
-    def add_documents(
+    def add_document(
         self,
-        docs: Sequence[Document],
-        corpus_id: Optional[str],
-        use_core_api: bool = False,
-        allow_update: bool = True,
+        doc: Document,
+        corpus_key: Optional[str] = None,
+        title: Optional[str] = None,
+        description: Optional[str] = None,
+        max_chars_per_chunk: Optional[int] = None,
     ) -> None:
-        nodes = [
-            TextNode(text=doc.get_content(), metadata=doc.metadata) for doc in docs  # type: ignore
-        ]
-        self._insert(nodes, corpus_id, use_core_api)
+        """ "
+        Indexes a document into a corpus using the Vectara Structured Document format.
+
+        Full API Docs: https://docs.vectara.com/docs/api-reference/indexing-apis/indexing#structured-document-object-definition
+
+        Args:
+            doc (Document): The document object to be indexed.
+                You should provide the value you want for the document id in the corpus as the id_ member of this object.
+                You should provide any document_metadata in the metadata member of this object.
+            corpus_key (str): If multiple corpora are provided for this index, the corpus_key of the corpus you want to add the document to.
+            title (str): The title of the document.
+            description (str): The description of the document.
+            max_chars_per_chunk (int): The maximum number of characters per chunk.
+        """
+        self._insert(
+            document=doc,
+            corpus_key=corpus_key,
+            title=title,
+            description=description,
+            max_chars_per_chunk=max_chars_per_chunk,
+        )
+
+    def add_nodes(
+        self,
+        nodes: Sequence[Node],
+        document_id: str,
+        document_metadata: Optional[Dict] = {},
+        corpus_key: Optional[str] = None,
+    ) -> None:
+        """
+        Indexes a document into a corpus using the Vectara Core Document format.
+
+        Full API Docs: https://docs.vectara.com/docs/api-reference/indexing-apis/indexing#core-document-object-definition
+
+        Args:
+            nodes (Sequence[Node]): The user-specified document parts.
+                You should provide any part_metadata in the metadata member of each node.
+            document_id (str): The document id (must be unique for the corpus).
+            document_metadata (Dict): The document_metadata to be associated with this document.
+            corpus_key (str): If multiple corpora are provided for this index, the corpus_key of the corpus you want to add the document to.
+        """
+        self._insert(
+            nodes=nodes,
+            corpus_key=corpus_key,
+            doc_id=document_id,
+            doc_metadata=document_metadata,
+        )
 
     def insert_file(
         self,
         file_path: str,
         metadata: Optional[dict] = None,
-        corpus_id: Optional[str] = None,
+        chunking_strategy: Optional[dict] = None,
+        enable_table_extraction: Optional[bool] = False,
+        filename: Optional[str] = None,
+        corpus_key: Optional[str] = None,
         **insert_kwargs: Any,
     ) -> Optional[str]:
         """
@@ -298,14 +320,17 @@ def insert_file(
         This method provides a way to use that API in Llama_index.
 
         # ruff: noqa: E501
-        Full API Docs: https://docs.vectara.com/docs/api-reference/indexing-apis/
-        file-upload/file-upload-filetypes
+        Full API Docs: https://docs.vectara.com/docs/rest-api/upload-file
 
         Args:
             file_path: local file path
                 Files could be text, HTML, PDF, markdown, doc/docx, ppt/pptx, etc.
                 see API docs for full list
-            metadata: Optional list of metadata associated with the file
+            metadata: Optional dict of metadata associated with the file
+            chunking_strategy: Optional dict specifying max number of characters per chunk
+            enable_table_extraction: Optional bool specifying whether or not to extract tables from document
+            filename: Optional string specifying the filename
+
 
         Returns:
             List of ids associated with each of the files indexed
@@ -314,17 +339,34 @@ def insert_file(
             _logger.error(f"File {file_path} does not exist")
             return None
 
-        metadata = metadata or {}
-        metadata["framework"] = "llama_index"
-        files: dict = {
-            "file": (file_path, open(file_path, "rb")),
-            "doc_metadata": json.dumps(metadata),
-        }
+        if filename is None:
+            filename = file_path.split("/")[-1]
+
+        files = {"file": (filename, open(file_path, "rb"))}
+
+        if metadata:
+            metadata["framework"] = "llama_index"
+            files["metadata"] = (None, json.dumps(metadata), "application/json")
+
+        if chunking_strategy:
+            files["chunking_strategy"] = (
+                None,
+                json.dumps(chunking_strategy),
+                "application/json",
+            )
+
+        if enable_table_extraction:
+            files["table_extraction_config"] = (
+                None,
+                json.dumps({"extract_tables": enable_table_extraction}),
+                "application/json",
+            )
+
         headers = self._get_post_headers()
         headers.pop("Content-Type")
-        valid_corpus_id = self._get_corpus_id(corpus_id)
+        valid_corpus_key = self._get_corpus_key(corpus_key)
         response = self._session.post(
-            f"https://api.vectara.io/upload?c={self._vectara_customer_id}&o={valid_corpus_id}&d=True",
+            f"https://api.vectara.io/v2/corpora/{valid_corpus_key}/upload_file",
             files=files,
             verify=True,
             headers=headers,
@@ -332,35 +374,70 @@ def insert_file(
         )
 
         res = response.json()
-        if response.status_code == 409:
-            _logger.info(
-                f"File {file_path} already exists on Vectara, skipping indexing"
-            )
-            return None
-        elif response.status_code == 200:
-            quota = res["response"]["quotaConsumed"]["numChars"]
-            if quota == 0:
-                _logger.warning(
-                    f"File Upload for {file_path} returned 0 quota consumed, please check your Vectara account quota"
-                )
-            doc_id = res["document"]["documentId"]
+        if response.status_code == 201:
+            doc_id = res["id"]
             self.doc_ids.append(doc_id)
             return doc_id
+        elif response.status_code == 400:
+            _logger.info(f"File upload failed with error message {res['field_errors']}")
+            return None
         else:
-            _logger.info(f"Error indexing file {file_path}: {res}")
+            _logger.info(f"File upload failed with error message {res['messages'][0]}")
             return None
 
     def delete_ref_doc(
-        self, ref_doc_id: str, delete_from_docstore: bool = False, **delete_kwargs: Any
+        self, ref_doc_id: str, delete_from_docstore: bool = True, **delete_kwargs: Any
     ) -> None:
-        raise NotImplementedError(
-            "Vectara does not support deleting a reference document"
-        )
+        """
+        Delete a document from a Vectara corpus.
+
+        Args:
+            ref_doc_id (str): ID of the document to delete
+            delete_from_docstore (bool): Whether to delete the document from the corpus.
+                If False, no change is made to the index or corpus.
+            corpus_key (str): corpus key to delete the document from.
+                This should be specified if there are multiple corpora in the index.
+        """
+        if delete_from_docstore:
+            if "corpus_key" in delete_kwargs:
+                self._delete_doc(
+                    doc_id=ref_doc_id, corpus_key=delete_kwargs["corpus_key"]
+                )
+            else:
+                self._delete_doc(doc_id=ref_doc_id)
 
     def update_ref_doc(self, document: Document, **update_kwargs: Any) -> None:
-        raise NotImplementedError(
-            "Vectara does not support updating a reference document"
-        )
+        """
+        Update a document's metadata in a Vectara corpus.
+
+        Args:
+            document (Document): The document to update.
+                Make sure to include id_ argument for proper identification within the corpus.
+            corpus_key (str): corpus key to modify the document from.
+                This should be specified if there are multiple corpora in the index.
+            metadata (dict): dictionary specifying any modifications or additions to the document's metadata.
+        """
+        if "metadata" in update_kwargs:
+            if "corpus_key" in update_kwargs:
+                valid_corpus_key = self._get_corpus_key(update_kwargs["corpus_key"])
+            else:
+                valid_corpus_key = self._get_corpus_key(corpus_key=None)
+
+            doc_id = document.doc_id
+            body = {"metadata": update_kwargs["metadata"]}
+            response = self._session.patch(
+                f"https://api.vectara.io/v2/corpora/{valid_corpus_key}/documents/{doc_id}",
+                data=json.dumps(body),
+                verify=True,
+                headers=self._get_post_headers(),
+                timeout=self.vectara_api_timeout,
+            )
+
+            if response.status_code != 200:
+                _logger.error(
+                    f"Update request failed for doc_id = {doc_id} with status code "
+                    f"{response.status_code}, text {response.json()['messages'][0]}"
+                )
 
     def as_retriever(self, **kwargs: Any) -> BaseRetriever:
         """Return a Retriever for this managed index."""
@@ -422,12 +499,12 @@ def from_documents(
         **kwargs: Any,
     ) -> IndexType:
         """Build a Vectara index from a sequence of documents."""
-        nodes = [
-            TextNode(text=document.get_content(), metadata=document.metadata)  # type: ignore
-            for document in documents
-        ]
-        return cls(
-            nodes=nodes,
+        index = cls(
             show_progress=show_progress,
             **kwargs,
         )
+
+        for doc in documents:
+            index.add_document(doc)
+
+        return index
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/query.py b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/query.py
index 44477eec48e46..9c3aa510f1c80 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/query.py
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/query.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Optional
+from typing import Any, List, Dict, Optional
 
 from llama_index.core.base.base_query_engine import BaseQueryEngine
 from llama_index.core.base.base_retriever import BaseRetriever
@@ -15,10 +15,10 @@
     BaseChatEngine,
     StreamingAgentChatResponse,
 )
+
 from llama_index.core.base.response.schema import (
     RESPONSE_TYPE,
     Response,
-    StreamingResponse,
 )
 from llama_index.indices.managed.vectara.retriever import VectaraRetriever
 
@@ -44,7 +44,7 @@ def __init__(
         summary_enabled: bool = False,
         summary_response_lang: str = "eng",
         summary_num_results: int = 5,
-        summary_prompt_name: str = "vectara-summary-ext-24-05-sml",
+        summary_prompt_name: str = "vectara-summary-ext-24-05-med-omni",
         verbose: bool = False,
         **kwargs: Any,
     ) -> None:
@@ -112,21 +112,17 @@ def _query(self, query_bundle: QueryBundle) -> RESPONSE_TYPE:
         """Answer a query."""
         kwargs = (
             {
-                "summary_response_lang": self._summary_response_lang,
-                "summary_num_results": self._summary_num_results,
-                "summary_prompt_name": self._summary_prompt_name,
+                "response_language": self._summary_response_lang,
+                "max_used_search_results": self._summary_num_results,
+                "generation_preset_name": self._summary_prompt_name,
             }
             if self._summary_enabled
             else {}
         )
 
         if self._streaming:
-            nodes = self.retrieve(query_bundle)
-            query_response = StreamingResponse(
-                response_gen=self._retriever._vectara_stream(
-                    query_bundle, chat=False, verbose=self._verbose
-                ),
-                source_nodes=nodes,
+            query_response = self._retriever._vectara_stream(
+                query_bundle, chat=False, verbose=self._verbose
             )
         else:
             nodes, response, _ = self._retriever._vectara_query(
@@ -168,7 +164,7 @@ def __init__(
         streaming: bool = False,
         summary_response_lang: str = "eng",
         summary_num_results: int = 5,
-        summary_prompt_name: str = "vectara-summary-ext-24-05-sml",
+        summary_prompt_name: str = "vectara-summary-ext-24-05-med-omni",
         node_postprocessors: Optional[List[BaseNodePostprocessor]] = None,
         callback_manager: Optional[CallbackManager] = None,
         verbose: bool = False,
@@ -214,9 +210,9 @@ def chat(self, message: str) -> AgentChatResponse:
         ) as query_event:
             kwargs = (
                 {
-                    "summary_response_lang": self._summary_response_lang,
-                    "summary_num_results": self._summary_num_results,
-                    "summary_prompt_name": self._summary_prompt_name,
+                    "response_language": self._summary_response_lang,
+                    "max_used_search_results": self._summary_num_results,
+                    "generation_preset_name": self._summary_prompt_name,
                 }
                 if self._summary_enabled
                 else {}
@@ -239,15 +235,18 @@ async def achat(self, message: str) -> AgentChatResponse:
         """Chat with the agent asynchronously."""
         return await self.chat(message)
 
+    def set_chat_id(self, source_nodes: List, metadata: Dict) -> None:
+        """Callback function for setting the conv_id."""
+        self.conv_id = metadata.get("chat_id", self.conv_id)
+
     def stream_chat(self, message: str) -> StreamingAgentChatResponse:
         query_bundle = QueryBundle(message)
-        nodes = self._retriever.retrieve(query_bundle)
 
-        return StreamingAgentChatResponse(
-            chat_stream=self._retriever._vectara_stream(
-                query_bundle, chat=True, conv_id=self.conv_id
-            ),
-            source_nodes=nodes,
+        return self._retriever._vectara_stream(
+            query_bundle,
+            chat=True,
+            conv_id=self.conv_id,
+            callback_func=self.set_chat_id,
         )
 
     async def astream_chat(self, message: str) -> StreamingAgentChatResponse:
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py
index 520f124bcb8af..fbfb134fac0f0 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/llama_index/indices/managed/vectara/retriever.py
@@ -5,20 +5,18 @@
 
 import json
 import logging
-from typing import Any, List, Optional, Tuple, Dict
+from typing import Any, List, Optional, Tuple, Dict, Callable, Union
 from enum import Enum
-import urllib.parse
 
 from llama_index.core.base.base_retriever import BaseRetriever
 from llama_index.core.callbacks.base import CallbackManager
 from llama_index.core.indices.vector_store.retrievers.auto_retriever.auto_retriever import (
     VectorIndexAutoRetriever,
 )
-from llama_index.core.schema import NodeWithScore, QueryBundle, TextNode
+from llama_index.core.schema import NodeWithScore, QueryBundle, Node, MediaResource
 from llama_index.core.types import TokenGen
-from llama_index.core.llms import (
-    CompletionResponse,
-)
+from llama_index.core.base.response.schema import StreamingResponse
+
 from llama_index.core.vector_stores.types import (
     FilterCondition,
     MetadataFilters,
@@ -33,28 +31,16 @@
 
 _logger = logging.getLogger(__name__)
 
-MMR_RERANKER_ID = 272725718
-SLINGSHOT_RERANKER_ID = 272725719
-UDF_RERANKER_ID = 272725722
-
 
 class VectaraReranker(str, Enum):
     NONE = "none"
     MMR = "mmr"
     SLINGSHOT = "multilingual_reranker_v1"
     SLINGSHOT_ALT_NAME = "slingshot"
-    UDF = "udf"
+    UDF = "userfn"
     CHAIN = "chain"
 
 
-CHAIN_RERANKER_NAMES = {
-    VectaraReranker.MMR: "Maximum Marginal Relevance Reranker",
-    VectaraReranker.SLINGSHOT: "Rerank_Multilingual_v1",
-    VectaraReranker.SLINGSHOT_ALT_NAME: "Rerank_Multilingual_v1",
-    VectaraReranker.UDF: "User_Defined_Function_Reranker",
-}
-
-
 class VectaraRetriever(BaseRetriever):
     """
     Vectara Retriever.
@@ -62,69 +48,99 @@ class VectaraRetriever(BaseRetriever):
     Args:
         index (VectaraIndex): the Vectara Index
         similarity_top_k (int): number of top k results to return, defaults to 5.
-        lambda_val (float): for hybrid search.
+        offset (int): number of results to skip, defaults to 0.
+        lambda_val (Union[List[float], float]): for hybrid search.
             0 = neural search only.
             1 = keyword match only.
-            In between values are a linear interpolation
+            In between values are a linear interpolation.
+            Provide single value for one corpus or a list of values for each corpus.
+        semantics (Union[List[str], str]): Indicates whether the query is intended as a query or response.
+            Provide single value for one corpus or a list of values for each corpus.
+        custom_dimensions (Dict): Custom dimensions for the query.
+            See (https://docs.vectara.com/docs/learn/semantic-search/add-custom-dimensions)
+            for more details about usage.
+            Provide single dict for one corpus or a list of dicts for each corpus.
         n_sentences_before (int):
             number of sentences before the matched sentence to return in the node
         n_sentences_after (int):
             number of sentences after the matched sentence to return in the node
-        filter: metadata filter (if specified)
-        reranker (str): reranker to use: none, mmr, slingshot/multilingual_reranker_v1, udf, or chain.
-            Note that "multilingual_reranker_v1" is a Vectara Scale feature only.
-        rerank_k: number of results to fetch for Reranking, defaults to 50.
-        mmr_diversity_bias: number between 0 and 1 that determines the degree
+        filter (Union[List[str], str]): metadata filter (if specified). Provide single string for one corpus
+            or a list of strings to specify the filter for each corpus (if multiple corpora).
+        reranker (str): reranker to use: none, mmr, slingshot/multilingual_reranker_v1, userfn, or chain.
+        rerank_k (int): number of results to fetch for Reranking, defaults to 50.
+        rerank_limit (int): maximum number of results to return after reranking, defaults to 50.
+            Don't specify this for chain reranking. Instead, put the "limit" parameter in the dict for each individual reranker.
+        rerank_cutoff (float): minimum score threshold for results to include after reranking, defaults to 0.
+            Don't specify this for chain reranking. Instead, put the "chain" parameter in the dict for each individual reranker.
+        mmr_diversity_bias (float): number between 0 and 1 that determines the degree
             of diversity among the results with 0 corresponding
             to minimum diversity and 1 to maximum diversity.
             Defaults to 0.3.
-        udf_expression: the user defined expression for reranking results.
+        udf_expression (str): the user defined expression for reranking results.
             See (https://docs.vectara.com/docs/learn/user-defined-function-reranker)
             for more details about syntax for udf reranker expressions.
-        rerank_chain: a list of rerankers to be applied in a sequence and their associated parameters
-            for the chain reranker. Each element should specify the "type" of reranker (mmr, slingshot, udf)
-            and any other parameters (e.g. "limit" or "cutoff" for any type,  "diversity_bias" for mmr, and "user_function" for udf).
+        rerank_chain (List[Dict]): a list of rerankers to be applied in a sequence and their associated parameters
+            for the chain reranker. Each element should specify the "type" of reranker (mmr, slingshot, userfn)
+            and any other parameters (e.g. "limit" or "cutoff" for any type,  "diversity_bias" for mmr, and "user_function" for userfn).
             If using slingshot/multilingual_reranker_v1, it must be first in the list.
-        summary_enabled: whether to generate summaries or not. Defaults to False.
-        summary_response_lang: language to use for summary generation.
-        summary_num_results: number of results to use for summary generation.
-        summary_prompt_name: name of the prompt to use for summary generation.
-        prompt_text: the custom prompt, using appropriate prompt variables and functions.
+        summary_enabled (bool): whether to generate summaries or not. Defaults to False.
+        summary_response_lang (str): language to use for summary generation.
+        summary_num_results (int): number of results to use for summary generation.
+        summary_prompt_name (str): name of the prompt to use for summary generation.
+            To use Vectara's Mockingbird LLM designed specifically for RAG, set to "mockingbird-1.0-2024-07-16".
+            If you are indexing documents with tables, we recommend "vectara-summary-table-query-ext-dec-2024-gpt-4o".
+            See (https://docs.vectara.com/docs/learn/grounded-generation/select-a-summarizer) for all available prompts.
+        prompt_text (str): the custom prompt, using appropriate prompt variables and functions.
             See (https://docs.vectara.com/docs/1.0/prompts/custom-prompts-with-metadata)
             for more details.
-        citations_style: The style of the citations in the summary generation,
-            either "numeric", "html", "markdown", or "none".
-            This is a Vectara Scale only feature. Defaults to None.
-        citations_url_pattern: URL pattern for html and markdown citations.
+        max_response_chars (int): the desired maximum number of characters for the generated summary.
+        max_tokens (int): the maximum number of tokens to be returned by the LLM.
+        temperature (float): The sampling temperature; higher values lead to more randomness.
+        frequency_penalty (float): How much to penalize repeating tokens in the response, reducing likelihood of repeating the same line.
+        presence_penalty (float): How much to penalize repeating tokens in the response, increasing the diversity of topics.
+        citations_style (str): The style of the citations in the summary generation,
+            either "numeric", "html", "markdown", or "none". Defaults to None.
+        citations_url_pattern (str): URL pattern for html and markdown citations.
             If non-empty, specifies the URL pattern to use for citations; e.g. "{doc.url}".
             See (https://docs.vectara.com/docs/api-reference/search-apis/search
-                 #citation-format-in-summary) for more details.
-            This is a Vectara Scale only feature. Defaults to None.
-        citations_text_pattern: The displayed text for citations.
+                 #citation-format-in-summary) for more details. Defaults to None.
+        citations_text_pattern (str): The displayed text for citations.
             If not specified, numeric citations are displayed for text.
+        save_history (bool): Whether to save the query in history. Defaults to False.
     """
 
     def __init__(
         self,
         index: VectaraIndex,
         similarity_top_k: int = 10,
-        lambda_val: float = 0.005,
+        offset: int = 0,
+        lambda_val: Union[List[float], float] = 0.005,
+        semantics: Union[List[str], str] = "default",
+        custom_dimensions: Union[List[Dict], Dict] = {},
         n_sentences_before: int = 2,
         n_sentences_after: int = 2,
-        filter: str = "",
+        filter: Union[List[str], str] = "",
         reranker: VectaraReranker = VectaraReranker.NONE,
         rerank_k: int = 50,
+        rerank_limit: Optional[int] = None,
+        rerank_cutoff: Optional[float] = None,
         mmr_diversity_bias: float = 0.3,
         udf_expression: str = None,
         rerank_chain: List[Dict] = None,
         summary_enabled: bool = False,
         summary_response_lang: str = "eng",
         summary_num_results: int = 7,
-        summary_prompt_name: str = "vectara-summary-ext-24-05-sml",
+        summary_prompt_name: str = "vectara-summary-ext-24-05-med-omni",
         prompt_text: Optional[str] = None,
+        max_response_chars: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
         citations_style: Optional[str] = None,
         citations_url_pattern: Optional[str] = None,
         citations_text_pattern: Optional[str] = None,
+        save_history: bool = False,
         callback_manager: Optional[CallbackManager] = None,
         x_source_str: str = "llama_index",
         **kwargs: Any,
@@ -132,42 +148,51 @@ def __init__(
         """Initialize params."""
         self._index = index
         self._similarity_top_k = similarity_top_k
+        self._offset = offset
         self._lambda_val = lambda_val
+        self._semantics = semantics
+        self._custom_dimensions = custom_dimensions
         self._n_sentences_before = n_sentences_before
         self._n_sentences_after = n_sentences_after
         self._filter = filter
-        self._prompt_text = prompt_text
-        self._citations_style = citations_style.upper() if citations_style else None
+        self._citations_style = citations_style
         self._citations_url_pattern = citations_url_pattern
         self._citations_text_pattern = citations_text_pattern
+        self._save_history = save_history
+
+        self._conv_id = None
         self._x_source_str = x_source_str
 
-        if reranker == VectaraReranker.MMR:
+        if reranker in [
+            VectaraReranker.MMR,
+            VectaraReranker.SLINGSHOT,
+            VectaraReranker.SLINGSHOT_ALT_NAME,
+            VectaraReranker.UDF,
+            VectaraReranker.CHAIN,
+            VectaraReranker.NONE,
+        ]:
             self._rerank = True
+            self._reranker = reranker
             self._rerank_k = rerank_k
-            self._mmr_diversity_bias = mmr_diversity_bias
-            self._reranker_id = MMR_RERANKER_ID
-        elif (
-            reranker == VectaraReranker.SLINGSHOT
-            or reranker == VectaraReranker.SLINGSHOT_ALT_NAME
-        ):
-            self._rerank = True
-            self._rerank_k = rerank_k
-            self._reranker_id = SLINGSHOT_RERANKER_ID
-        elif reranker == VectaraReranker.UDF and udf_expression is not None:
-            self._rerank = True
-            self._rerank_k = rerank_k
-            self._udf_expression = udf_expression
-            self._reranker_id = UDF_RERANKER_ID
-        elif (
-            reranker == VectaraReranker.CHAIN
-            and rerank_chain is not None
-            and len(rerank_chain) > 0
-        ):
-            self._rerank = True
-            self._rerank_k = rerank_k
-            self._rerank_chain = rerank_chain
-            self._reranker_id = None
+            self._rerank_limit = rerank_limit
+            self._rerank_cutoff = rerank_cutoff
+
+            if self._reranker == VectaraReranker.MMR:
+                self._mmr_diversity_bias = mmr_diversity_bias
+
+            elif self._reranker == VectaraReranker.UDF:
+                self._udf_expression = udf_expression
+
+            elif self._reranker == VectaraReranker.CHAIN:
+                self._rerank_chain = rerank_chain
+                for sub_reranker in self._rerank_chain:
+                    if sub_reranker["type"] in [
+                        VectaraReranker.SLINGSHOT,
+                        VectaraReranker.SLINGSHOT_ALT_NAME,
+                    ]:
+                        sub_reranker["type"] = "customer_reranker"
+                        sub_reranker["reranker_name"] = "Rerank_Multilingual_v1"
+
         else:
             self._rerank = False
 
@@ -176,6 +201,13 @@ def __init__(
             self._summary_response_lang = summary_response_lang
             self._summary_num_results = summary_num_results
             self._summary_prompt_name = summary_prompt_name
+            self._prompt_text = prompt_text
+            self._max_response_chars = max_response_chars
+            self._max_tokens = max_tokens
+            self._temperature = temperature
+            self._frequency_penalty = frequency_penalty
+            self._presence_penalty = presence_penalty
+
         else:
             self._summary_enabled = False
         super().__init__(callback_manager)
@@ -184,7 +216,6 @@ def _get_post_headers(self) -> dict:
         """Returns headers that should be attached to each post request."""
         return {
             "x-api-key": self._index._vectara_api_key,
-            "customer-id": self._index._vectara_customer_id,
             "Content-Type": "application/json",
             "X-Source": self._x_source_str,
         }
@@ -208,109 +239,128 @@ def _retrieve(
         Retrieve top k most similar nodes.
 
         Args:
-            query: Query Bundle
+            query_bundle: Query Bundle
         """
         return self._vectara_query(query_bundle, **kwargs)[0]  # return top_nodes only
 
     def _build_vectara_query_body(
         self,
         query_str: str,
-        chat: bool = False,
-        chat_conv_id: Optional[str] = None,
         **kwargs: Any,
     ) -> Dict:
-        corpus_keys = [
-            {
-                "customerId": self._index._vectara_customer_id,
-                "corpusId": corpus_id,
-                "lexicalInterpolationConfig": {"lambda": self._lambda_val},
-            }
-            for corpus_id in self._index._vectara_corpus_id.split(",")
-        ]
-        if len(self._filter) > 0:
-            for k in corpus_keys:
-                k["metadataFilter"] = self._filter
-
         data = {
-            "query": [
-                {
-                    "query": query_str,
-                    "start": 0,
-                    "numResults": (
-                        self._rerank_k if self._rerank else self._similarity_top_k
-                    ),
-                    "contextConfig": {
-                        "sentencesBefore": self._n_sentences_before,
-                        "sentencesAfter": self._n_sentences_after,
-                    },
-                    "corpusKey": corpus_keys,
-                }
-            ]
+            "query": query_str,
+            "search": {
+                "offset": self._offset,
+                "limit": self._rerank_k if self._rerank else self._similarity_top_k,
+                "context_configuration": {
+                    "sentences_before": self._n_sentences_before,
+                    "sentences_after": self._n_sentences_after,
+                },
+            },
         }
-        if self._rerank and self._reranker_id is not None:
-            reranking_config = {
-                "rerankerId": self._reranker_id,
-            }
-            if self._reranker_id == MMR_RERANKER_ID:
-                reranking_config["mmrConfig"] = {
-                    "diversityBias": self._mmr_diversity_bias
-                }
-            elif self._reranker_id == UDF_RERANKER_ID:
-                reranking_config["userFunction"] = self._udf_expression
-
-            data["query"][0]["rerankingConfig"] = reranking_config
-
-        elif self._rerank:
-            reranking_config = current_config = {}
-
-            for i, rerank_info in enumerate(self._rerank_chain):
-                rerank_type = rerank_info.get("type", None)
-                if rerank_type is None:
-                    print("Missing argument 'type' in chain reranker")
-                else:
-                    current_config["reranker_name"] = CHAIN_RERANKER_NAMES[rerank_type]
-
-                    current_config.update(
-                        {
-                            param: value
-                            for param, value in rerank_info.items()
-                            if param != "type"
-                        }
-                    )
 
-                    if i < len(self._rerank_chain) - 1:
-                        current_config["next_reranking_config"] = {}
-                        current_config = current_config["next_reranking_config"]
+        corpora_config = [
+            {"corpus_key": corpus_key}
+            for corpus_key in self._index._vectara_corpus_key.split(",")
+        ]
+
+        for i in range(len(corpora_config)):
+            corpora_config[i]["custom_dimensions"] = (
+                self._custom_dimensions[i]
+                if isinstance(self._custom_dimensions, list)
+                else self._custom_dimensions
+            )
+            corpora_config[i]["metadata_filter"] = (
+                self._filter[i] if isinstance(self._filter, list) else self._filter
+            )
+            corpora_config[i]["lexical_interpolation"] = (
+                self._lambda_val[i]
+                if isinstance(self._lambda_val, list)
+                else self._lambda_val
+            )
+            corpora_config[i]["semantics"] = (
+                self._semantics[i]
+                if isinstance(self._semantics, list)
+                else self._semantics
+            )
 
-            data["query"][0]["rerankingConfig"] = reranking_config
+        data["search"]["corpora"] = corpora_config
+
+        if self._rerank:
+            rerank_config = {}
+
+            if self._reranker in [
+                VectaraReranker.SLINGSHOT,
+                VectaraReranker.SLINGSHOT_ALT_NAME,
+            ]:
+                rerank_config["type"] = "customer_reranker"
+                rerank_config["reranker_name"] = "Rerank_Multilingual_v1"
+            else:
+                rerank_config["type"] = self._reranker
+
+            if self._reranker == VectaraReranker.MMR:
+                rerank_config["diversity_bias"] = self._mmr_diversity_bias
+
+            elif self._reranker == VectaraReranker.UDF:
+                rerank_config["user_function"] = self._udf_expression
+
+            elif self._reranker == VectaraReranker.CHAIN:
+                rerank_config["rerankers"] = self._rerank_chain
+
+            if self._rerank_limit:
+                rerank_config["limit"] = self._rerank_limit
+            if self._rerank_cutoff:
+                rerank_config["cutoff"] = self._rerank_cutoff
+
+            data["search"]["reranker"] = rerank_config
 
         if self._summary_enabled:
             summary_config = {
-                "responseLang": self._summary_response_lang,
-                "maxSummarizedResults": self._summary_num_results,
-                "summarizerPromptName": self._summary_prompt_name,
+                "response_language": self._summary_response_lang,
+                "max_used_search_results": self._summary_num_results,
+                "generation_preset_name": self._summary_prompt_name,
+                "enable_factual_consistency_score": True,
             }
-            data["query"][0]["summary"] = [summary_config]
             if self._prompt_text:
-                data["query"][0]["summary"][0]["promptText"] = self._prompt_text
-            if chat:
-                data["query"][0]["summary"][0]["chat"] = {
-                    "store": True,
-                    "conversationId": chat_conv_id,
-                }
-
+                summary_config["prompt_template"] = self._prompt_text
+            if self._max_response_chars:
+                summary_config["max_response_characters"] = self._max_response_chars
+
+            model_parameters = {}
+            if self._max_tokens:
+                model_parameters["max_tokens"] = self._max_tokens
+            if self._temperature:
+                model_parameters["temperature"] = self._temperature
+            if self._frequency_penalty:
+                model_parameters["frequency_penalty"] = self._frequency_penalty
+            if self._presence_penalty:
+                model_parameters["presence_penalty"] = self._presence_penalty
+
+            if len(model_parameters) > 0:
+                summary_config["model_parameters"] = model_paramters
+
+            citations_config = {}
             if self._citations_style:
-                if self._citations_style in ["NUMERIC", "NONE"]:
-                    data["query"][0]["summary"][0]["citationParams"] = {
-                        "style": self._citations_style,
-                    }
-
-                elif self._citations_url_pattern:
-                    data["query"][0]["summary"][0]["citationParams"] = {
-                        "style": self._citations_style,
-                        "urlPattern": self._citations_url_pattern,
-                        "textPattern": self._citations_text_pattern,
-                    }
+                if self._citations_style in ["numeric", "none"]:
+                    citations_config["style"] = self._citations_style
+                elif (
+                    self._citations_style in ["html", "markdown"]
+                    and self._citations_url_pattern
+                ):
+                    citations_config["style"] = self._citations_style
+                    citations_config["url_pattern"] = self._citations_url_pattern
+                    citations_config["text_pattern"] = self._citations_text_pattern
+                else:
+                    _logger.warning(
+                        f"Invalid citations style {self._citations_style}. Must be one of 'numeric', 'html', 'markdown', or 'none'."
+                    )
+
+            if len(citations_config) > 0:
+                summary_config["citations"] = citations_config
+
+            data["generation"] = summary_config
+            data["save_history"] = self._save_history
 
         return data
 
@@ -320,108 +370,121 @@ def _vectara_stream(
         chat: bool = False,
         conv_id: Optional[str] = None,
         verbose: bool = False,
+        callback_func: Callable[[List, Dict], None] = None,
         **kwargs: Any,
-    ) -> TokenGen:
+    ) -> StreamingResponse:
         """
         Query Vectara index to get for top k most similar nodes.
 
         Args:
             query_bundle: Query Bundle
-            chat: whether to enable chat
-            conv_id: conversation ID, if chat enabled
+            chat: whether to use chat API in Vectara
+            conv_id: conversation ID, if adding to existing chat
         """
         body = self._build_vectara_query_body(query_bundle.query_str)
+        body["stream_response"] = True
         if verbose:
             print(f"Vectara streaming query request body: {body}")
-        response = self._index._session.post(
-            headers=self._get_post_headers(),
-            url="https://api.vectara.io/v1/stream-query",
-            data=json.dumps(body),
-            timeout=self._index.vectara_api_timeout,
-            stream=True,
-        )
 
-        if response.status_code != 200:
-            print(
-                "Query failed %s",
-                f"(code {response.status_code}, reason {response.reason}, details "
-                f"{response.text})",
+        if chat:
+            body["chat"] = {"store": True}
+            if conv_id or self._conv_id:
+                conv_id = conv_id or self._conv_id
+                response = self._index._session.post(
+                    headers=self._get_post_headers(),
+                    url=f"https://api.vectara.io/v2/chats/{conv_id}/turns",
+                    data=json.dumps(body),
+                    timeout=self._index.vectara_api_timeout,
+                    stream=True,
+                )
+            else:
+                response = self._index._session.post(
+                    headers=self._get_post_headers(),
+                    url="https://api.vectara.io/v2/chats",
+                    data=json.dumps(body),
+                    timeout=self._index.vectara_api_timeout,
+                    stream=True,
+                )
+
+        else:
+            response = self._index._session.post(
+                headers=self._get_post_headers(),
+                url="https://api.vectara.io/v2/query",
+                data=json.dumps(body),
+                timeout=self._index.vectara_api_timeout,
+                stream=True,
             )
-            return
 
-        responses = []
-        documents = []
-        stream_response = CompletionResponse(
-            text="", additional_kwargs={"fcs": None}, raw=None, delta=None
+        if response.status_code != 200:
+            result = response.json()
+            if response.status_code == 400:
+                _logger.error(
+                    f"Query failed (code {response.status_code}), reason {result['field_errors']}"
+                )
+            else:
+                _logger.error(
+                    f"Query failed (code {response.status_code}), reason {result['messages'][0]}"
+                )
+            return None
+
+        def process_chunks(response):
+            source_nodes = []
+            response_metadata = {}
+
+            def text_generator() -> TokenGen:
+                for line in response.iter_lines():
+                    line = line.decode("utf-8")
+                    if line:
+                        key, value = line.split(":", 1)
+                        if key == "data":
+                            line = json.loads(value)
+                            if line["type"] == "generation_chunk":
+                                yield line["generation_chunk"]
+
+                            elif line["type"] == "factual_consistency_score":
+                                response_metadata["fcs"] = line[
+                                    "factual_consistency_score"
+                                ]
+
+                            elif line["type"] == "search_results":
+                                search_results = line["search_results"]
+                                source_nodes.extend(
+                                    [
+                                        NodeWithScore(
+                                            node=Node(
+                                                text_resource=MediaResource(
+                                                    text=search_result["text"]
+                                                ),
+                                                id_=search_result["document_id"],
+                                                metadata=search_result[
+                                                    "document_metadata"
+                                                ],
+                                            ),
+                                            score=search_result["score"],
+                                        )
+                                        for search_result in search_results[
+                                            : self._similarity_top_k
+                                        ]
+                                    ]
+                                )
+
+                            elif line["type"] == "chat_info":
+                                self._conv_id = line["chat_id"]
+                                response_metadata["chat_id"] = line["chat_id"]
+
+                if callback_func:
+                    callback_func(source_nodes, response_metadata)
+
+            return text_generator(), source_nodes, response_metadata
+
+        response_chunks, response_nodes, response_metadata = process_chunks(response)
+
+        return StreamingResponse(
+            response_gen=response_chunks,
+            source_nodes=response_nodes,
+            metadata=response_metadata,
         )
 
-        for line in response.iter_lines():
-            if line:  # filter out keep-alive new lines
-                data = json.loads(line.decode("utf-8"))
-                result = data["result"]
-                response_set = result["responseSet"]
-                if response_set is None:
-                    summary = result.get("summary", None)
-                    if summary is None:
-                        continue
-                    if len(summary.get("status")) > 0:
-                        print(
-                            f"Summary generation failed with status {summary.get('status')[0].get('statusDetail')}"
-                        )
-                        continue
-
-                    # Store conversation ID for chat, if applicable
-                    chat = summary.get("chat", None)
-                    if chat and chat.get("status", None):
-                        st_code = chat["status"]
-                        print(f"Chat query failed with code {st_code}")
-                        if st_code == "RESOURCE_EXHAUSTED":
-                            self.conv_id = None
-                            print("Sorry, Vectara chat turns exceeds plan limit.")
-                            continue
-
-                    conv_id = chat.get("conversationId", None) if chat else None
-                    if conv_id:
-                        self.conv_id = conv_id
-
-                    # if factual consistency score is provided, pull that from the JSON response
-                    if summary.get("factualConsistency", None):
-                        fcs = summary.get("factualConsistency", {}).get("score", None)
-                        stream_response.additional_kwargs["fcs"] = fcs
-                        continue
-
-                    # Yield the summary chunk
-                    chunk = urllib.parse.unquote(summary["text"])
-                    stream_response.text += chunk
-                    stream_response.delta = chunk
-                    yield stream_response
-                else:
-                    metadatas = []
-                    for x in responses:
-                        md = {m["name"]: m["value"] for m in x["metadata"]}
-                        doc_num = x["documentIndex"]
-                        doc_md = {
-                            m["name"]: m["value"]
-                            for m in documents[doc_num]["metadata"]
-                        }
-                        md.update(doc_md)
-                        metadatas.append(md)
-
-                    top_nodes = []
-                    for x, md in zip(responses, metadatas):
-                        doc_inx = x["documentIndex"]
-                        doc_id = documents[doc_inx]["id"]
-                        node = NodeWithScore(
-                            node=TextNode(text=x["text"], id_=doc_id, metadata=md), score=x["score"]  # type: ignore
-                        )
-                        top_nodes.append(node)
-                    stream_response.additional_kwargs["top_nodes"] = top_nodes[
-                        : self._similarity_top_k
-                    ]
-                    stream_response.delta = None
-                    yield stream_response
-        return
-
     def _vectara_query(
         self,
         query_bundle: QueryBundle,
@@ -435,8 +498,8 @@ def _vectara_query(
 
         Args:
             query: Query Bundle
-            chat: whether to enable chat in Vectara
-            conv_id: conversation ID, if chat enabled
+            chat: whether to use chat API in Vectara
+            conv_id: conversation ID, if adding to existing chat
             verbose: whether to print verbose output (e.g. for debugging)
             Additional keyword arguments
 
@@ -445,79 +508,78 @@ def _vectara_query(
             Dict: summary
             str: conversation ID, if applicable
         """
-        data = self._build_vectara_query_body(query_bundle.query_str, chat, conv_id)
+        data = self._build_vectara_query_body(query_bundle.query_str)
 
         if verbose:
             print(f"Vectara query request body: {data}")
-        response = self._index._session.post(
-            headers=self._get_post_headers(),
-            url="https://api.vectara.io/v1/query",
-            data=json.dumps(data),
-            timeout=self._index.vectara_api_timeout,
-        )
 
-        if response.status_code != 200:
-            _logger.error(
-                "Query failed %s",
-                f"(code {response.status_code}, reason {response.reason}, details "
-                f"{response.text})",
+        if chat:
+            data["chat"] = {"store": True}
+            if conv_id:
+                response = self._index._session.post(
+                    headers=self._get_post_headers(),
+                    url=f"https://api.vectara.io/v2/chats/{conv_id}/turns",
+                    data=json.dumps(data),
+                    timeout=self._index.vectara_api_timeout,
+                )
+            else:
+                response = self._index._session.post(
+                    headers=self._get_post_headers(),
+                    url="https://api.vectara.io/v2/chats",
+                    data=json.dumps(data),
+                    timeout=self._index.vectara_api_timeout,
+                )
+
+        else:
+            response = self._index._session.post(
+                headers=self._get_post_headers(),
+                url="https://api.vectara.io/v2/query",
+                data=json.dumps(data),
+                timeout=self._index.vectara_api_timeout,
             )
-            return [], {"text": ""}, ""
 
         result = response.json()
-        if verbose:
-            print(f"Vectara query response: {result}")
-        status = result["responseSet"][0]["status"]
-        if len(status) > 0 and status[0]["code"] != "OK":
-            _logger.error(
-                f"Query failed (code {status[0]['code']}, msg={status[0]['statusDetail']}"
-            )
+        if response.status_code != 200:
+            if response.status_code == 400:
+                _logger.error(
+                    f"Query failed (code {response.status_code}), reason {result['field_errors']}"
+                )
+            else:
+                _logger.error(
+                    f"Query failed (code {response.status_code}), reason {result['messages'][0]}"
+                )
             return [], {"text": ""}, ""
 
-        responses = result["responseSet"][0]["response"]
-        documents = result["responseSet"][0]["document"]
+        if "warnings" in result:
+            _logger.warning(f"Query warning(s) {(', ').join(result['warnings'])}")
 
-        if self._summary_enabled:
-            summaryJson = result["responseSet"][0]["summary"][0]
-            if len(summaryJson["status"]) > 0:
-                print(
-                    f"Summary generation failed with error: '{summaryJson['status'][0]['statusDetail']}'"
-                )
-                return [], {"text": ""}, ""
+        if verbose:
+            print(f"Vectara query response: {result}")
 
+        if self._summary_enabled:
             summary = {
-                "text": (
-                    urllib.parse.unquote(summaryJson["text"])
-                    if self._summary_enabled
-                    else None
-                ),
-                "fcs": summaryJson["factualConsistency"]["score"],
+                "text": result["answer"] if chat else result["summary"],
+                "fcs": result.get("factual_consistency_score"),
             }
-            if summaryJson.get("chat", None):
-                conv_id = summaryJson["chat"]["conversationId"]
-            else:
-                conv_id = None
         else:
             summary = None
 
-        metadatas = []
-        for x in responses:
-            md = {m["name"]: m["value"] for m in x["metadata"]}
-            doc_num = x["documentIndex"]
-            doc_md = {m["name"]: m["value"] for m in documents[doc_num]["metadata"]}
-            md.update(doc_md)
-            metadatas.append(md)
-
-        top_nodes = []
-        for x, md in zip(responses, metadatas):
-            doc_inx = x["documentIndex"]
-            doc_id = documents[doc_inx]["id"]
-            node = NodeWithScore(
-                node=TextNode(text=x["text"], id_=doc_id, metadata=md), score=x["score"]  # type: ignore
+        search_results = result["search_results"]
+        top_nodes = [
+            NodeWithScore(
+                node=Node(
+                    text_resource=MediaResource(text=search_result["text"]),
+                    id_=search_result["document_id"],
+                    metadata=search_result["document_metadata"],
+                ),
+                score=search_result["score"],
             )
-            top_nodes.append(node)
+            for search_result in search_results[: self._similarity_top_k]
+        ]
 
-        return top_nodes[: self._similarity_top_k], summary, conv_id
+        conv_id = result["chat_id"] if chat else None
+
+        return top_nodes, summary, conv_id
 
     async def _avectara_query(
         self,
@@ -532,15 +594,14 @@ async def _avectara_query(
 
         Args:
             query: Query Bundle
-            chat: whether to enable chat in Vectara
-            conv_id: conversation ID, if chat enabled
+            chat: whether to use chat API in Vectara
+            conv_id: conversation ID, if adding to existing chat
             verbose: whether to print verbose output (e.g. for debugging)
             Additional keyword arguments
 
         Returns:
             List[NodeWithScore]: list of nodes with scores
             Dict: summary
-            str: conversation ID, if applicable
         """
         return await self._vectara_query(query_bundle, chat, conv_id, verbose, **kwargs)
 
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml b/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml
index 1378f080986c4..cb31692b77030 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/pyproject.toml
@@ -31,7 +31,7 @@ exclude = ["**/BUILD"]
 license = "MIT"
 name = "llama-index-indices-managed-vectara"
 readme = "README.md"
-version = "0.3.1"
+version = "0.4.0"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
diff --git a/llama-index-integrations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py b/llama-index-integrations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py
index 7aa9a2673a7d2..a0b644f58a925 100644
--- a/llama-index-integrations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py
+++ b/llama-index-integrations/indices/llama-index-indices-managed-vectara/tests/test_indices_managed_vectara.py
@@ -1,5 +1,5 @@
 from typing import List
-from llama_index.core.schema import Document
+from llama_index.core.schema import Document, Node, MediaResource
 from llama_index.core.indices.managed.base import BaseManagedIndex
 from llama_index.indices.managed.vectara import VectaraIndex
 import pytest
@@ -8,16 +8,22 @@
 #
 # For this test to run properly, please setup as follows:
 # 1. Create a Vectara account: sign up at https://console.vectara.com/signup
-# 2. Create a corpus in your Vectara account, with the following filter attributes:
+# 2. Create two corpora with corpus keys "Llamaindex-testing-1" and "llamaindex-testing-2" in your Vectara account with the following filter attributes:
+#   "Llamaindex-testing-1":
 #   a. doc.test_num (text)
 #   b. doc.test_score (integer)
 #   c. doc.date (text)
 #   d. doc.url (text)
-# 3. Create an API_KEY for this corpus with permissions for query and indexing
+#   "llamaindex-testing-2":
+#   a. doc.author (text)
+#   b. doc.title (text)
+#   c. part.test_num (text)
+#   d. part.test_score (integer)
+#   e. part.date (text)
+# 3. Create an API_KEY for these corpora with permissions for query and indexing
 # 4. Setup environment variables:
-#    VECTARA_API_KEY, VECTARA_CORPUS_ID, VECTARA_CUSTOMER_ID, and OPENAI_API_KEY
-#
-# Note: In order to run test_citations, you will need a Scale account.
+#    VECTARA_API_KEY, VECTARA_CORPUS_KEY, and OPENAI_API_KEY
+#    For VECTARA_CORPUS_KEY, separate the corpus keys for the corpora with a ',' for example: "Llamaindex-testing-1,llamaindex-testing-2".
 #
 
 
@@ -29,18 +35,22 @@ def test_class():
 def get_docs() -> List[Document]:
     inputs = [
         {
+            "id": "doc_1",
             "text": "This is test text for Vectara integration with LlamaIndex",
             "metadata": {"test_num": "1", "test_score": 10, "date": "2020-02-25"},
         },
         {
+            "id": "doc_2",
             "text": "And now for something completely different",
             "metadata": {"test_num": "2", "test_score": 2, "date": "2015-10-13"},
         },
         {
+            "id": "doc_3",
             "text": "when 900 years you will be, look as good you will not",
             "metadata": {"test_num": "3", "test_score": 20, "date": "2023-09-12"},
         },
         {
+            "id": "doc_4",
             "text": "when 850 years you will be, look as good you will not",
             "metadata": {"test_num": "4", "test_score": 50, "date": "2022-01-01"},
         },
@@ -48,13 +58,43 @@ def get_docs() -> List[Document]:
     docs: List[Document] = []
     for inp in inputs:
         doc = Document(
-            text=str(inp["text"]),
-            metadata=inp["metadata"],  # type: ignore
+            id_=inp["id"],
+            text_resource=MediaResource(text=inp["text"]),
+            metadata=inp["metadata"],
         )
         docs.append(doc)
     return docs
 
 
+def get_nodes() -> List[Node]:
+    inputs = [
+        {
+            "text": "This is test text for Vectara integration with LlamaIndex",
+            "metadata": {"test_num": "1", "test_score": 10, "date": "2020-02-25"},
+        },
+        {
+            "text": "And now for something completely different",
+            "metadata": {"test_num": "2", "test_score": 2, "date": "2015-10-13"},
+        },
+        {
+            "text": "when 900 years you will be, look as good you will not",
+            "metadata": {"test_num": "3", "test_score": 20, "date": "2023-09-12"},
+        },
+        {
+            "text": "when 850 years you will be, look as good you will not",
+            "metadata": {"test_num": "4", "test_score": 50, "date": "2022-01-01"},
+        },
+    ]
+
+    nodes: List[Node] = []
+    for inp in inputs:
+        node = Node(
+            text_resource=MediaResource(text=inp["text"]), metadata=inp["metadata"]
+        )
+        nodes.append(node)
+    return nodes
+
+
 @pytest.fixture()
 def vectara1():
     docs = get_docs()
@@ -67,7 +107,7 @@ def vectara1():
 
     # Tear down code
     for id in vectara1.doc_ids:
-        vectara1._delete_doc(id)
+        vectara1.delete_ref_doc(id)
 
 
 def test_simple_retrieval(vectara1) -> None:
@@ -76,6 +116,7 @@ def test_simple_retrieval(vectara1) -> None:
     res = qe.retrieve("Find me something different")
     assert len(res) == 1
     assert res[0].node.get_content() == docs[1].text
+    assert res[0].node.node_id == docs[1].doc_id
 
 
 def test_mmr_retrieval(vectara1) -> None:
@@ -87,7 +128,7 @@ def test_mmr_retrieval(vectara1) -> None:
         n_sentences_before=0,
         n_sentences_after=0,
         reranker="mmr",
-        mmr_k=10,
+        rerank_k=10,
         mmr_diversity_bias=0.0,
     )
     res = qe.retrieve("how will I look?")
@@ -101,7 +142,7 @@ def test_mmr_retrieval(vectara1) -> None:
         n_sentences_before=0,
         n_sentences_after=0,
         reranker="mmr",
-        mmr_k=10,
+        rerank_k=10,
         mmr_diversity_bias=1.0,
     )
     res = qe.retrieve("how will I look?")
@@ -114,7 +155,7 @@ def test_retrieval_with_filter(vectara1) -> None:
     docs = get_docs()
 
     assert isinstance(vectara1, VectaraIndex)
-    qe = vectara1.as_retriever(similarity_top_k=1, filter="doc.test_num = '1'")
+    qe = vectara1.as_retriever(similarity_top_k=1, filter=["doc.test_num = '1'", ""])
     res = qe.retrieve("What does this test?")
     assert len(res) == 1
     assert res[0].node.get_content() == docs[0].text
@@ -128,7 +169,7 @@ def test_udf_retrieval(vectara1) -> None:
         similarity_top_k=2,
         n_sentences_before=0,
         n_sentences_after=0,
-        reranker="udf",
+        reranker="userfn",
         udf_expression="get('$.score') + get('$.document_metadata.test_score')",
     )
 
@@ -142,7 +183,7 @@ def test_udf_retrieval(vectara1) -> None:
         similarity_top_k=2,
         n_sentences_before=0,
         n_sentences_after=0,
-        reranker="udf",
+        reranker="userfn",
         udf_expression="max(0, 5 * get('$.score') - (to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.date'), 'yyyy-MM-dd'))) / 31536000)",
     )
 
@@ -179,7 +220,7 @@ def test_chain_rerank_retrieval(vectara1) -> None:
             {"type": "slingshot"},
             {"type": "mmr"},
             {
-                "type": "udf",
+                "type": "userfn",
                 "user_function": "5 * get('$.score') + get('$.document_metadata.test_score') / 2",
                 "limit": 2,
             },
@@ -209,7 +250,8 @@ def test_chain_rerank_retrieval(vectara1) -> None:
 
     # Second query with same retriever to ensure rerank chain configuration remains the same
     res = qe.retrieve("How will I look when I'm older?")
-    assert qe._rerank_chain[0].get("type") == "slingshot"
+    assert qe._rerank_chain[0].get("type") == "customer_reranker"
+    assert qe._rerank_chain[0].get("reranker_name") == "Rerank_Multilingual_v1"
     assert qe._rerank_chain[1].get("type") == "mmr"
     assert res[0].node.get_content() == docs[2].text
 
@@ -231,7 +273,22 @@ def test_custom_prompt(vectara1) -> None:
     assert "integration" in str(res).lower()
     assert "llamaindex" in str(res).lower()
     assert "vectara" in str(res).lower()
-    assert "first" in str(res).lower()
+    assert "result" in str(res).lower()
+
+
+def test_update_doc(vectara1) -> None:
+    docs = get_docs()
+
+    vectara1.update_ref_doc(
+        document=docs[1], corpus_key="Llamaindex-testing-1", metadata={"test_score": 14}
+    )
+
+    qe = vectara1.as_retriever(similarity_top_k=1)
+
+    res = qe.retrieve("Find me something completely different.")
+    assert len(res) == 1
+    assert res[0].node.get_content() == docs[1].text
+    assert res[0].node.metadata["test_score"] == 14
 
 
 @pytest.fixture()
@@ -243,13 +300,15 @@ def vectara2():
 
     file_path = "docs/docs/examples/data/paul_graham/paul_graham_essay.txt"
     id = vectara2.insert_file(
-        file_path, metadata={"url": "https://www.paulgraham.com/worked.html"}
+        file_path,
+        metadata={"url": "https://www.paulgraham.com/worked.html"},
+        corpus_key="llamaindex-testing-2",
     )
 
     yield vectara2
 
     # Tear down code
-    vectara2._delete_doc(id)
+    vectara2.delete_ref_doc(id, corpus_key="llamaindex-testing-2")
 
 
 def test_file_upload(vectara2) -> None:
@@ -263,33 +322,44 @@ def test_file_upload(vectara2) -> None:
     # test query with Vectara summarization (streaming)
     query_engine = vectara2.as_query_engine(similarity_top_k=3, streaming=True)
     res = query_engine.query("What software did Paul Graham write?")
-    summary = ""
-    for chunk in res.response_gen:
-        if chunk.delta:
-            summary += chunk.delta
-        if (
-            chunk.additional_kwargs
-            and "fcs" in chunk.additional_kwargs
-            and chunk.additional_kwargs["fcs"] is not None
-        ):
-            assert chunk.additional_kwargs["fcs"] >= 0
+    summary = str(res)
+
     assert "paul graham" in summary.lower() and "software" in summary.lower()
+    assert res.metadata["fcs"] >= 0
+    assert len(res.source_nodes) > 0
 
     # test query with VectorStoreQuery (using OpenAI for summarization)
-    query_engine = vectara2.as_query_engine(
-        similarity_top_k=3, summary_enabled=False, verbose=True
-    )
+    query_engine = vectara2.as_query_engine(similarity_top_k=3, summary_enabled=False)
     res = query_engine.query("What software did Paul Graham write?")
     assert "paul graham" in str(res).lower() and "software" in str(res).lower()
 
     # test query with Vectara summarization (default)
-    query_engine = vectara2.as_query_engine(similarity_top_k=3, verbose=True)
+    query_engine = vectara2.as_query_engine(similarity_top_k=3)
     res = query_engine.query("How is Paul related to Reddit?")
     summary = res.response
     assert "paul graham" in summary.lower() and "reddit" in summary.lower()
     assert "https://www.paulgraham.com/worked.html" in str(res.source_nodes)
 
 
+def test_knee_reranker(vectara2) -> None:
+    query_engine = vectara2.as_query_engine(
+        rerank_k=50,
+        similarity_top_k=50,
+        reranker="chain",
+        rerank_chain=[
+            {"type": "slingshot"},
+            {"type": "userfn", "user_function": "knee()"},
+        ],
+    )
+
+    # test query with knee reranker (should return less results than rerank_k)
+    res = query_engine.query("How is Paul related to Reddit?")
+    summary = res.response
+    assert "paul" in summary.lower() and "reddit" in summary.lower()
+    assert "https://www.paulgraham.com/worked.html" in str(res.source_nodes)
+    assert len(res.source_nodes) > 0 and len(res.source_nodes) < 20
+
+
 def test_citations(vectara2) -> None:
     # test markdown citations
     query_engine = vectara2.as_query_engine(
@@ -328,3 +398,105 @@ def test_citations(vectara2) -> None:
     summary = res.response
     assert "https://www.paulgraham.com/worked.html" in summary
     assert re.search(r"\[\d+\]", summary)
+
+
+def test_chat(vectara2) -> None:
+    # Test chat initialization
+    chat_engine = vectara2.as_chat_engine(
+        reranker="chain",
+        rerank_k=30,
+        rerank_chain=[{"type": "slingshot"}, {"type": "mmr", "diversity_bias": 0.2}],
+    )
+    res = chat_engine.chat("What grad schools did Paul apply to?")
+    summary = res.response
+
+    assert all(s in summary.lower() for s in ["mit", "yale", "harvard"])
+    assert res.metadata["fcs"] > 0
+    chat_id = chat_engine.conv_id
+    assert chat_id is not None
+
+    # Test chat follow up
+    res = chat_engine.chat("What did he learn at the graduate school he selected?")
+    summary = res.response
+
+    assert "learn" in summary.lower()
+    assert "harvard" in summary.lower()
+    assert res.metadata["fcs"] > 0
+    assert chat_engine.conv_id == chat_id
+
+    # Test chat follow up with streaming
+    res = chat_engine.stream_chat(
+        "How did attending graduate school help him in his career?"
+    )
+    summary = str(res)
+
+    assert len(res.source_nodes) > 0
+    assert chat_engine.conv_id == chat_id
+
+    # Test chat initialization with streaming
+    chat_engine = vectara2.as_chat_engine(
+        reranker="chain",
+        rerank_k=30,
+        rerank_chain=[
+            {"type": "slingshot", "cutoff": 0.25},
+            {"type": "mmr", "diversity_bias": 0.2},
+        ],
+    )
+    res = chat_engine.stream_chat("How did Paul feel when Yahoo bought his company?")
+    summary = str(res)
+
+    assert "yahoo" in summary.lower()
+    assert "felt" in summary.lower()
+    assert chat_engine._retriever._conv_id is not None
+    assert chat_engine._retriever._conv_id != chat_id
+    assert len(res.source_nodes) > 0
+
+
+@pytest.fixture()
+def vectara3():
+    nodes = get_nodes()
+    try:
+        vectara3 = VectaraIndex()
+        vectara3.add_nodes(
+            nodes,
+            document_id="doc_1",
+            document_metadata={"author": "Vectara", "title": "LlamaIndex Integration"},
+            corpus_key="llamaindex-testing-2",
+        )
+    except ValueError:
+        pytest.skip("Missing Vectara credentials, skipping test")
+
+    yield vectara3
+
+    # Tear down code
+    for id in vectara3.doc_ids:
+        vectara3.delete_ref_doc(id, corpus_key="llamaindex-testing-2")
+
+
+def test_simple_retrieval_with_nodes(vectara3) -> None:
+    nodes = get_nodes()
+    qe = vectara3.as_retriever(
+        similarity_top_k=1, n_sentences_before=0, n_sentences_after=0
+    )
+    res = qe.retrieve("Find me something different")
+    assert len(res) == 1
+    assert res[0].node.metadata["author"] == "Vectara"
+    assert res[0].node.metadata["title"] == "LlamaIndex Integration"
+    assert res[0].node.get_content() == nodes[1].text_resource.text
+
+
+def test_filter_with_nodes(vectara3) -> None:
+    nodes = get_nodes()
+    qe = vectara3.as_retriever(
+        similarity_top_k=2,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        lambda_val=[0.2, 0.01],
+        filter=["", "doc.author = 'Vectara' AND part.test_score > 10"],
+    )
+
+    res = qe.retrieve("How will I look when I'm older?")
+    assert len(res) == 2
+    assert "look as good you will not" in res[0].node.get_content()
+    assert "look as good you will not" in res[1].node.get_content()
+    assert res[0].node.get_content() != res[1].node.get_content()
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md b/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md
index 3b48504bd8a2d..d06d0cbc46764 100644
--- a/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/README.md
@@ -4,12 +4,13 @@ This tool connects to a Vectara corpus and allows agents to make semantic search
 
 ## Usage
 
+Please note that this usage example relies on version >=0.3.0.
+
 This tool has a more extensive example usage documented in a Jupyter notebok [here](https://github.com/run-llama/llama_index/blob/main/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb)
 
-To use this tool, you'll need the following information in your environment:
+To use this tool, you'll need a Vectara account (If you don't have an account, you can create one [here](https://vectara.com/integrations/llamaindex)) and the following information in your environment:
 
-- `VECTARA_CUSTOMER_ID`: The customer id for your Vectara account. If you don't have an account, you can create one [here](https://vectara.com/integrations/llamaindex).
-- `VECTARA_CORPUS_ID`: The corpus id for the Vectara corpus that you want your tool to search for information. If you need help creating a corpus with your data, follow this [Quick Start](https://docs.vectara.com/docs/quickstart) guide.
+- `VECTARA_CORPUS_KEY`: The corpus key for the Vectara corpus that you want your tool to search for information. If you need help creating a corpus with your data, follow this [Quick Start](https://docs.vectara.com/docs/quickstart) guide.
 - `VECTARA_API_KEY`: An API key that can perform queries on this corpus.
 
 Here's an example usage of the VectaraQueryToolSpec.
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb b/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb
index 028979f10f8a7..d12e71e8bcab6 100644
--- a/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/examples/vectara_query.ipynb
@@ -21,13 +21,13 @@
    "id": "4452df2a-976b-4e1c-9f9b-2fa983569948",
    "metadata": {},
    "source": [
+    "Please note that this example notebook is only for Vectara Query tool versions >=0.3.0\n",
+    "\n",
     "To get started with Vectara, [sign up](https://vectara.com/integrations/llamaindex) (if you haven't already) and follow our [quickstart](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key.\n",
     "\n",
     "Once you have done this, add the following variables to your environment:\n",
     "\n",
-    "`VECTARA_CUSTOMER_ID`: The customer id for your Vectara account.\n",
-    "\n",
-    "`VECTARA_CORPUS_ID`: The corpus id for the Vectara corpus that you want your tool to search for information.\n",
+    "`VECTARA_CORPUS_KEY`: The corpus key for the Vectara corpus that you want your tool to search for information.\n",
     "\n",
     "`VECTARA_API_KEY`: An API key that can perform queries on this corpus.\n",
     "\n",
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py
index dff6bf75ff4d9..75cbb5622f89b 100644
--- a/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/llama_index/tools/vectara_query/base.py
@@ -1,4 +1,4 @@
-from typing import Any, List, Dict, Optional
+from typing import Any, Union, List, Dict, Optional
 from llama_index.core.tools.tool_spec.base import BaseToolSpec
 from llama_index.core.schema import QueryBundle
 from llama_index.core.callbacks.base import CallbackManager
@@ -15,80 +15,114 @@ class VectaraQueryToolSpec(BaseToolSpec):
 
     def __init__(
         self,
-        vectara_customer_id: Optional[str] = None,
-        vectara_corpus_id: Optional[str] = None,
+        vectara_corpus_key: Optional[str] = None,
         vectara_api_key: Optional[str] = None,
         num_results: int = 5,
-        lambda_val: float = 0.005,
+        offset: int = 0,
+        lambda_val: Union[List[float], float] = 0.005,
+        semantics: Union[List[str], str] = "default",
+        custom_dimensions: Union[List[Dict], Dict] = {},
         n_sentences_before: int = 2,
         n_sentences_after: int = 2,
-        metadata_filter: str = "",
+        metadata_filter: Union[List[str], str] = "",
         reranker: str = "mmr",
         rerank_k: int = 50,
+        rerank_limit: Optional[int] = None,
+        rerank_cutoff: Optional[float] = None,
         mmr_diversity_bias: float = 0.2,
         udf_expression: str = None,
         rerank_chain: List[Dict] = None,
         summarizer_prompt_name: str = "vectara-summary-ext-24-05-sml",
         summary_num_results: int = 5,
         summary_response_lang: str = "eng",
+        prompt_text: Optional[str] = None,
+        max_response_chars: Optional[int] = None,
+        max_tokens: Optional[int] = None,
+        temperature: Optional[float] = None,
+        frequency_penalty: Optional[float] = None,
+        presence_penalty: Optional[float] = None,
         citations_style: Optional[str] = None,
         citations_url_pattern: Optional[str] = None,
         citations_text_pattern: Optional[str] = None,
+        save_history: bool = False,
         callback_manager: Optional[CallbackManager] = None,
         **kwargs: Any,
     ) -> None:
         """Initializes the Vectara API and query parameters.
 
         Parameters:
-        - vectara_customer_id (str): Your Vectara customer ID.
-            If not specified, reads for environment variable "VECTARA_CUSTOMER_ID".
-        - vectara_corpus_id (str): The corpus ID for the corpus you want to search for information.
-            If not specified, reads for environment variable "VECTARA_CORPUS_ID".
+        - vectara_corpus_key (str): The corpus key for the corpus you want to search for information.
+            If not specified, reads for environment variable "VECTARA_CORPUS_KEY".
         - vectara_api_key (str): An API key that has query permissions for the given corpus.
             If not specified, reads for environment variable "VECTARA_API_KEY".
         - num_results (int): Number of search results to return with response.
-        - lambda_val (float): Lambda value for the Vectara query.
+        - offset (int): Number of results to skip.
+        - lambda_val (Union[List[float], float]): Lambda value for the Vectara query.
+            Provide single value for one corpus or a list of values for each corpus.
+        - semantics (Union[List[str], str]): Indicates whether the query is intended as a query or response.
+            Provide single value for one corpus or a list of values for each corpus.
+        - custom_dimensions (Dict): Custom dimensions for the query.
+            See (https://docs.vectara.com/docs/learn/semantic-search/add-custom-dimensions)
+            for more details about usage.
+            Provide single dict for one corpus or a list of dicts for each corpus.
         - n_sentences_before (int): Number of sentences before the summary.
         - n_sentences_after (int): Number of sentences after the summary.
-        - metadata_filter (str): A string with expressions to filter the search documents.
-        - reranker (str): The reranker mode, either "mmr", "slingshot", "multilingual_reranker_v1", "udf", or "none".
+        - metadata_filter (Union[List[str], str]): A string with expressions to filter the search documents for each corpus.
+            Provide single string for one corpus or a list of strings for each corpus (if multiple corpora).
+        - reranker (str): The reranker to use, either mmr, slingshot (i.e. multilingual_reranker_v1), userfn, or chain.
         - rerank_k (int): Number of top-k documents for reranking.
+        - rerank_limit (int): maximum number of results to return after reranking, defaults to 50.
+            Don't specify this for chain reranking. Instead, put the "limit" parameter in the dict for each individual reranker.
+        - rerank_cutoff (float): minimum score threshold for results to include after reranking, defaults to 0.
+            Don't specify this for chain reranking. Instead, put the "chain" parameter in the dict for each individual reranker.
         - mmr_diversity_bias (float): MMR diversity bias.
         - udf_expression (str): the user defined expression for reranking results.
             See (https://docs.vectara.com/docs/learn/user-defined-function-reranker)
             for more details about syntax for udf reranker expressions.
         - rerank_chain: a list of rerankers to be applied in a sequence and their associated parameters
-            for the chain reranker. Each element should specify the "type" of reranker (mmr, slingshot, udf)
+            for the chain reranker. Each element should specify the "type" of reranker (mmr, slingshot, userfn)
             and any other parameters (e.g. "limit" or "cutoff" for any type,  "diversity_bias" for mmr, and "user_function" for udf).
             If using slingshot/multilingual_reranker_v1, it must be first in the list.
         - summarizer_prompt_name (str): If enable_summarizer is True, the Vectara summarizer to use.
         - summary_num_results (int): If enable_summarizer is True, the number of summary results.
         - summary_response_lang (str): If enable_summarizer is True, the response language for the summary.
+        - prompt_text (str): the custom prompt, using appropriate prompt variables and functions.
+            See (https://docs.vectara.com/docs/1.0/prompts/custom-prompts-with-metadata)
+            for more details.
+        - max_response_chars (int): the desired maximum number of characters for the generated summary.
+        - max_tokens (int): the maximum number of tokens to be returned by the LLM.
+        - temperature (float): The sampling temperature; higher values lead to more randomness.
+        - frequency_penalty (float): How much to penalize repeating tokens in the response, reducing likelihood of repeating the same line.
+        - presence_penalty (float): How much to penalize repeating tokens in the response, increasing the diversity of topics.
         - citations_style (str): The style of the citations in the summary generation,
-            either "numeric", "html", "markdown", or "none".
-            This is a Vectara Scale only feature. Defaults to None.
+            either "numeric", "html", "markdown", or "none". Defaults to None.
         - citations_url_pattern (str): URL pattern for html and markdown citations.
             If non-empty, specifies the URL pattern to use for citations; e.g. "{doc.url}".
             See (https://docs.vectara.com/docs/api-reference/search-apis/search#citation-format-in-summary) for more details.
-            This is a Vectara Scale only feature. Defaults to None.
+            Defaults to None.
         - citations_text_pattern (str): The displayed text for citations.
             If not specified, numeric citations are displayed.
+        - save_history (bool): Whether to save the query in history. Defaults to False.
         """
         self.index = VectaraIndex(
-            vectara_customer_id=vectara_customer_id,
-            vectara_corpus_id=vectara_corpus_id,
+            vectara_corpus_key=vectara_corpus_key,
             vectara_api_key=vectara_api_key,
         )
 
         self.retriever = VectaraRetriever(
             index=self.index,
             similarity_top_k=num_results,
+            offset=offset,
             lambda_val=lambda_val,
+            semantics=semantics,
+            custom_dimensions=custom_dimensions,
             n_sentences_before=n_sentences_before,
             n_sentences_after=n_sentences_after,
             filter=metadata_filter,
             reranker=reranker,
             rerank_k=rerank_k,
+            rerank_limit=rerank_limit,
+            rerank_cutoff=rerank_cutoff,
             mmr_diversity_bias=mmr_diversity_bias,
             udf_expression=udf_expression,
             rerank_chain=rerank_chain,
@@ -100,12 +134,17 @@ def __init__(
         query_engine_retriever = VectaraRetriever(
             index=self.index,
             similarity_top_k=num_results,
+            offset=offset,
             lambda_val=lambda_val,
+            semantics=semantics,
+            custom_dimensions=custom_dimensions,
             n_sentences_before=n_sentences_before,
             n_sentences_after=n_sentences_after,
             filter=metadata_filter,
             reranker=reranker,
             rerank_k=rerank_k,
+            rerank_limit=rerank_limit,
+            rerank_cutoff=rerank_cutoff,
             mmr_diversity_bias=mmr_diversity_bias,
             udf_expression=udf_expression,
             rerank_chain=rerank_chain,
@@ -113,6 +152,11 @@ def __init__(
             summary_response_lang=summary_response_lang,
             summary_num_results=summary_num_results,
             summary_prompt_name=summarizer_prompt_name,
+            prompt_text=prompt_text,
+            max_response_chars=max_response_chars,
+            max_tokens=max_tokens,
+            temperature=temperature,
+            frequency_penalty=frequency_penalty,
             citations_style=citations_style,
             citations_url_pattern=citations_url_pattern,
             citations_text_pattern=citations_text_pattern,
@@ -142,7 +186,7 @@ def semantic_search(
 
         return [
             {
-                "text": doc.node.text,
+                "text": doc.node.text_resource.text,
                 "citation_metadata": doc.node.metadata,
             }
             for doc in response
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml
index 0da57cfa54d2d..83b1de3a8aadc 100644
--- a/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/pyproject.toml
@@ -28,12 +28,12 @@ license = "MIT"
 name = "llama-index-tools-vectara-query"
 packages = [{include = "llama_index/"}]
 readme = "README.md"
-version = "0.2.0"
+version = "0.3.0"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
 llama-index-core = "^0.12.0"
-llama-index-indices-managed-vectara = "^0.3.0"
+llama-index-indices-managed-vectara = "^0.4.0"
 
 [tool.poetry.group.dev.dependencies]
 black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"}
diff --git a/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py
index 46310e6f011eb..74fcdfbb9f899 100644
--- a/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py
+++ b/llama-index-integrations/tools/llama-index-tools-vectara-query/tests/test_tools_vectara_query.py
@@ -1,5 +1,5 @@
 from typing import List
-from llama_index.core.schema import Document
+from llama_index.core.schema import Document, MediaResource
 from llama_index.indices.managed.vectara import VectaraIndex
 from llama_index.core.tools.tool_spec.base import BaseToolSpec
 from llama_index.tools.vectara_query import VectaraQueryToolSpec
@@ -11,16 +11,15 @@
 #
 # For this test to run properly, please setup as follows:
 # 1. Create a Vectara account: sign up at https://console.vectara.com/signup
-# 2. Create a corpus in your Vectara account, with the following filter attributes:
+# 2. Create two corpora in your Vectara account with the following filter attributes in the first corpus:
 #   a. doc.test_num (text)
 #   b. doc.test_score (integer)
 #   c. doc.date (text)
 #   d. doc.url (text)
-# 3. Create an API_KEY for this corpus with permissions for query and indexing
+# 3. Create an API_KEY for these corpora with permissions for query and indexing
 # 4. Setup environment variables:
-#    VECTARA_API_KEY, VECTARA_CORPUS_ID, VECTARA_CUSTOMER_ID, and OPENAI_API_KEY
-#
-# Note: In order to run test_citations, you will need a Scale account.
+#    VECTARA_API_KEY, VECTARA_CORPUS_KEY, and OPENAI_API_KEY
+#    For VECTARA_CORPUS_KEY, separate the corpus keys for the corpora with a ',' for example: "llamaindex-testing-1,llamaindex-testing-2".
 #
 
 
@@ -32,18 +31,22 @@ def test_class():
 def get_docs() -> List[Document]:
     inputs = [
         {
+            "id": "doc_1",
             "text": "This is test text for Vectara integration with LlamaIndex",
             "metadata": {"test_num": "1", "test_score": 10, "date": "2020-02-25"},
         },
         {
+            "id": "doc_2",
             "text": "And now for something completely different",
             "metadata": {"test_num": "2", "test_score": 2, "date": "2015-10-13"},
         },
         {
+            "id": "doc_3",
             "text": "when 900 years you will be, look as good you will not",
             "metadata": {"test_num": "3", "test_score": 20, "date": "2023-09-12"},
         },
         {
+            "id": "doc_4",
             "text": "when 850 years you will be, look as good you will not",
             "metadata": {"test_num": "4", "test_score": 50, "date": "2022-01-01"},
         },
@@ -51,7 +54,8 @@ def get_docs() -> List[Document]:
     docs: List[Document] = []
     for inp in inputs:
         doc = Document(
-            text=str(inp["text"]),
+            id_=inp["id"],
+            text_resource=MediaResource(text=inp["text"]),
             metadata=inp["metadata"],
         )
         docs.append(doc)
@@ -70,7 +74,7 @@ def vectara1():
 
     # Tear down code
     for id in vectara1.doc_ids:
-        vectara1._delete_doc(id)
+        vectara1.delete_ref_doc(id)
 
 
 def test_simple_retrieval(vectara1) -> None:
@@ -78,7 +82,7 @@ def test_simple_retrieval(vectara1) -> None:
     tool_spec = VectaraQueryToolSpec(num_results=1)
     res = tool_spec.semantic_search("Find me something different.")
     assert len(res) == 1
-    assert res[0]["text"] == docs[1].text
+    assert res[0]["text"] == docs[1].text_resource.text
 
 
 def test_mmr_retrieval(vectara1) -> None:
@@ -95,8 +99,8 @@ def test_mmr_retrieval(vectara1) -> None:
     )
     res = tool_spec.semantic_search("How will I look?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[2].text
-    assert res[1]["text"] == docs[3].text
+    assert res[0]["text"] == docs[2].text_resource.text
+    assert res[1]["text"] == docs[3].text_resource.text
 
     # test with diversity bias = 1
     tool_spec = VectaraQueryToolSpec(
@@ -109,18 +113,18 @@ def test_mmr_retrieval(vectara1) -> None:
     )
     res = tool_spec.semantic_search("How will I look?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[2].text
+    assert res[0]["text"] == docs[2].text_resource.text
 
 
 def test_retrieval_with_filter(vectara1) -> None:
     docs = get_docs()
 
     tool_spec = VectaraQueryToolSpec(
-        num_results=1, metadata_filter="doc.test_num = '1'"
+        num_results=1, metadata_filter=["doc.test_num = '1'", ""]
     )
     res = tool_spec.semantic_search("What does this test?")
     assert len(res) == 1
-    assert res[0]["text"] == docs[0].text
+    assert res[0]["text"] == docs[0].text_resource.text
 
 
 def test_udf_retrieval(vectara1) -> None:
@@ -131,28 +135,28 @@ def test_udf_retrieval(vectara1) -> None:
         num_results=2,
         n_sentences_before=0,
         n_sentences_after=0,
-        reranker="udf",
+        reranker="userfn",
         udf_expression="get('$.score') + get('$.document_metadata.test_score')",
     )
 
     res = tool_spec.semantic_search("What will the future look like?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[3].text
-    assert res[1]["text"] == docs[2].text
+    assert res[0]["text"] == docs[3].text_resource.text
+    assert res[1]["text"] == docs[2].text_resource.text
 
     # test with dates: Weight of score subtracted by number of years from current date
     tool_spec = VectaraQueryToolSpec(
         num_results=2,
         n_sentences_before=0,
         n_sentences_after=0,
-        reranker="udf",
+        reranker="userfn",
         udf_expression="max(0, 5 * get('$.score') - (to_unix_timestamp(now()) - to_unix_timestamp(datetime_parse(get('$.document_metadata.date'), 'yyyy-MM-dd'))) / 31536000)",
     )
 
     res = tool_spec.semantic_search("What will the future look like?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[2].text
-    assert res[1]["text"] == docs[3].text
+    assert res[0]["text"] == docs[2].text_resource.text
+    assert res[1]["text"] == docs[3].text_resource.text
 
 
 def test_chain_rerank_retrieval(vectara1) -> None:
@@ -169,7 +173,7 @@ def test_chain_rerank_retrieval(vectara1) -> None:
 
     res = tool_spec.semantic_search("What's this all about?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[0].text
+    assert res[0]["text"] == docs[0].text_resource.text
 
     # Test chain with UDF and limit
     tool_spec = VectaraQueryToolSpec(
@@ -181,7 +185,7 @@ def test_chain_rerank_retrieval(vectara1) -> None:
             {"type": "slingshot"},
             {"type": "mmr"},
             {
-                "type": "udf",
+                "type": "userfn",
                 "user_function": "5 * get('$.score') + get('$.document_metadata.test_score') / 2",
                 "limit": 2,
             },
@@ -190,8 +194,8 @@ def test_chain_rerank_retrieval(vectara1) -> None:
 
     res = tool_spec.semantic_search("What's this all about?")
     assert len(res) == 2
-    assert res[0]["text"] == docs[3].text
-    assert res[1]["text"] == docs[2].text
+    assert res[0]["text"] == docs[3].text_resource.text
+    assert res[1]["text"] == docs[2].text_resource.text
 
     # Test chain with cutoff
     tool_spec = VectaraQueryToolSpec(
@@ -207,7 +211,37 @@ def test_chain_rerank_retrieval(vectara1) -> None:
 
     res = tool_spec.semantic_search("What's this all about?")
     assert len(res) == 1
-    assert res[0]["text"] == docs[0].text
+    assert res[0]["text"] == docs[0].text_resource.text
+
+    # Second query with same retriever to ensure rerank chain configuration remains the same
+    res = tool_spec.semantic_search("How will I look when I'm older?")
+    assert tool_spec.retriever._rerank_chain[0].get("type") == "customer_reranker"
+    assert (
+        tool_spec.retriever._rerank_chain[0].get("reranker_name")
+        == "Rerank_Multilingual_v1"
+    )
+    assert tool_spec.retriever._rerank_chain[1].get("type") == "mmr"
+    assert res[0]["text"] == docs[2].text_resource.text
+
+
+def test_custom_prompt(vectara1) -> None:
+    docs = get_docs()
+
+    tool_spec = VectaraQueryToolSpec(
+        num_results=3,
+        n_sentences_before=0,
+        n_sentences_after=0,
+        reranker="mmr",
+        mmr_diversity_bias=0.2,
+        prompt_text='[\n  {"role": "system", "content": "You are an expert in summarizing the future of Vectara\'s inegration with LlamaIndex. Your summaries are insightful, concise, and highlight key innovations and changes."},\n  #foreach ($result in $vectaraQueryResults)\n    {"role": "user", "content": "What are the key points in result number $vectaraIdxWord[$foreach.index] about Vectara\'s LlamaIndex integration?"},\n    {"role": "assistant", "content": "In result number $vectaraIdxWord[$foreach.index], the key points are: ${result.getText()}"},\n  #end\n  {"role": "user", "content": "Can you generate a comprehensive summary on \'Vectara\'s LlamaIndex Integration\' incorporating all the key points discussed?"}\n]\n',
+    )
+
+    res = tool_spec.rag_query("How will Vectara's integration look in the future?")
+    assert "integration" in res["summary"].lower()
+    assert "llamaindex" in res["summary"].lower()
+    assert "vectara" in res["summary"].lower()
+    assert "result" in res["summary"].lower()
+    assert res["factual_consistency_score"] > 0
 
 
 @pytest.fixture()
@@ -225,7 +259,7 @@ def vectara2():
     yield vectara2
 
     # Tear down code
-    vectara2._delete_doc(id)
+    vectara2.delete_ref_doc(id)
 
 
 def test_basic_rag_query(vectara2) -> None:
@@ -287,7 +321,7 @@ def test_agent_basic(vectara2) -> None:
 
     tool_spec = VectaraQueryToolSpec(num_results=10, reranker="mmr")
     agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
-    res = agent.chat("Please summarize Paul Graham's work").response
+    res = agent.chat("Please summarize Paul's thoughts about paintings?").response
     agent_tasks = agent.get_completed_tasks()
     tool_called = (
         agent_tasks[0]
@@ -296,12 +330,12 @@ def test_agent_basic(vectara2) -> None:
         .function.name
     )
     assert tool_called == "rag_query"
-    assert "bel" in res.lower() and "lisp" in res.lower()
+    assert "paint" in res.lower() and "paul" in res.lower()
 
 
 def test_agent_filter(vectara1) -> None:
     tool_spec = VectaraQueryToolSpec(
-        num_results=1, metadata_filter="doc.date > '2022-02-01'"
+        num_results=1, metadata_filter=["doc.date > '2022-02-01'", ""]
     )
 
     agent = OpenAIAgent.from_tools(tool_spec.to_tool_list())
diff --git a/llama-index-packs/llama-index-packs-vectara-rag/README.md b/llama-index-packs/llama-index-packs-vectara-rag/README.md
index c7a44fe6156fa..dab3a6a4bfd1b 100644
--- a/llama-index-packs/llama-index-packs-vectara-rag/README.md
+++ b/llama-index-packs/llama-index-packs-vectara-rag/README.md
@@ -1,6 +1,7 @@
 # Vectara RAG Pack
 
 This LlamaPack provides an end-to-end Retrieval Augmented Generation flow using Vectara.
+Please note that this guide is only relevant for versions >= 0.4.0
 
 To use the Vectara RAG Pack, you will need a Vectara account. If you don't have one already, you can [sign up](https://vectara.com/integrations/llamaindex)
 and follow our [Quick Start](https://docs.vectara.com/docs/quickstart) guide to create a corpus and an API key (make sure it has both indexing and query permissions).
@@ -8,8 +9,7 @@ and follow our [Quick Start](https://docs.vectara.com/docs/quickstart) guide to
 You can then configure your environment or provide the following arguments directly when initializing your VectaraIndex:
 
 ```
-VECTARA_CUSTOMER_ID=your_customer_id
-VECTARA_CORPUS_ID=your_corpus_id
+VECTARA_CORPUS_KEY=your_corpus_key
 VECTARA_API_KEY=your-vectara-api-key
 ```
 
@@ -55,7 +55,7 @@ Additional optional arguments to VectaraRAG:
   determines the balance between pure neural search (0) and keyword matching (1).
 - `n_sentences_before` and `n_sentences_after`: determine the number of sentences before/after the
   matching fact to use with the summarization LLM. defaults to 2.
-- `reranker`: 'none', 'mmr', 'multilingual_reranker_v1', 'udf', or 'chain'
+- `reranker`: 'none', 'mmr', 'multilingual_reranker_v1', 'userfn', or 'chain'
   The reranker name 'slingshot' is the same as 'multilingual_reranker_v1' (backwards compatible)
 - `rerank_k`: the number of results to use for reranking, defaults to 50.
 - `mmr_diversity_bias`: when using the mmr reranker, determines the degree
@@ -69,8 +69,7 @@ Additional optional arguments to VectaraRAG:
   - `summary_response_lang`: language to use (ISO 639-2 code) for summary generation. defaults to "eng".
   - `summary_num_results`: number of results to use for summary generation. Defaults to 7.
   - `summary_prompt_name`: name of the prompt to use for summary generation.
-    Defaults to 'vectara-summary-ext-v1.2.0'.
-    Scale customers can use 'vectara-summary-ext-v1.3.0
+    Defaults to 'vectara-summary-ext-24-05-sml'.
 
 For example to use maximal diversity with MMR:
 
diff --git a/llama-index-packs/llama-index-packs-vectara-rag/pyproject.toml b/llama-index-packs/llama-index-packs-vectara-rag/pyproject.toml
index 330a68bb55d7c..26d7fc39b09da 100644
--- a/llama-index-packs/llama-index-packs-vectara-rag/pyproject.toml
+++ b/llama-index-packs/llama-index-packs-vectara-rag/pyproject.toml
@@ -29,11 +29,11 @@ license = "MIT"
 maintainers = ["ofermend"]
 name = "llama-index-packs-vectara-rag"
 readme = "README.md"
-version = "0.3.0"
+version = "0.4.0"
 
 [tool.poetry.dependencies]
 python = ">=3.9,<4.0"
-llama-index-indices-managed-vectara = "^0.3.0"
+llama-index-indices-managed-vectara = "^0.4.0"
 llama-index-core = "^0.12.0"
 
 [tool.poetry.group.dev.dependencies]