diff --git a/instill/helpers/const.py b/instill/helpers/const.py
index eccbeae..8df566a 100644
--- a/instill/helpers/const.py
+++ b/instill/helpers/const.py
@@ -1,6 +1,6 @@
 import os
 from enum import Enum
-from typing import Any, Dict, Union
+from typing import Any, Dict, List, Union
 
 import numpy as np
 
@@ -23,11 +23,14 @@ class DataType(Enum):
 
 class TextGenerationInput:
     prompt = ""
+    prompt_images: Union[List[np.ndarray], None] = None
+    chat_history: Union[List[str], None] = None
+    system_message: Union[str, None] = None
     max_new_tokens = 100
-    top_k = 1
     temperature = 0.8
+    top_k = 1
     random_seed = 0
-    stop_words: Any = ""
+    stop_words: Any = ""  # Optional
     extra_params: Dict[str, str] = {}
 
 
@@ -53,21 +56,28 @@ class ImageToImageInput:
 
 
 class TextGenerationChatInput:
-    conversation = ""
+    prompt = ""
+    prompt_images: Union[List[np.ndarray], None] = None
+    chat_history: Union[List[str], None] = None
+    system_message: Union[str, None] = None
     max_new_tokens = 100
-    top_k = 1
     temperature = 0.8
+    top_k = 1
     random_seed = 0
+    stop_words: Any = ""  # Optional
     extra_params: Dict[str, str] = {}
 
 
 class VisualQuestionAnsweringInput:
-    prompt_image: Union[np.ndarray, None] = None
     prompt = ""
+    prompt_images: Union[List[np.ndarray], None] = None
+    chat_history: Union[List[str], None] = None
+    system_message: Union[str, None] = None
     max_new_tokens = 100
-    top_k = 1
     temperature = 0.8
+    top_k = 1
     random_seed = 0
+    stop_words: Any = ""  # Optional
     extra_params: Dict[str, str] = {}
 
 
diff --git a/instill/helpers/ray_io.py b/instill/helpers/ray_io.py
index a666bb8..a5ecf73 100644
--- a/instill/helpers/ray_io.py
+++ b/instill/helpers/ray_io.py
@@ -109,20 +109,67 @@ def parse_task_text_generation_input(request) -> TextGenerationInput:
                         ({type(text_generation_input.prompt)}): {text_generation_input.prompt}"
                 )
 
+            if input_name == "prompt_images":
+                input_tensors = deserialize_bytes_tensor(b_input_tensor)
+                images = []
+                for enc in input_tensors:
+                    pil_img = Image.open(io.BytesIO(enc.astype(bytes)))  # RGB
+                    image = np.array(pil_img)
+                    if len(image.shape) == 2:  # gray image
+                        raise ValueError(
+                            f"The image shape with {image.shape} is "
+                            f"not in acceptable"
+                        )
+                    images.append(image)
+                # TODO: check wethere there are issues in batch size dimention
+                text_generation_input.prompt_images = images
+                print(
+                    "[DEBUG] input `prompt_images` type"
+                    f"({type(text_generation_input.prompt_images)}): "
+                    f"{text_generation_input.prompt_images}"
+                )
+
+            if input_name == "chat_history":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                chat_history_str = str(input_tensor[0].decode("utf-8"))
+                print(
+                    "[DEBUG] input `chat_history_str` type"
+                    f"({type(chat_history_str)}): "
+                    f"{chat_history_str}"
+                )
+                try:
+                    text_generation_input.chat_history = json.loads(chat_history_str)
+                except JSONDecodeError:
+                    print("[DEBUG] WARNING `extra_params` parsing faield!")
+                    continue
+
+            if input_name == "system_message":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                text_generation_input.system_message = str(
+                    input_tensor[0].decode("utf-8")
+                )
+                print(
+                    "[DEBUG] input `system_message` type"
+                    f"({type(text_generation_input.system_message)}): "
+                    f"{text_generation_input.system_message}"
+                )
+
             if input_name == "max_new_tokens":
                 text_generation_input.max_new_tokens = int.from_bytes(
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `max_new_tokens` type\
-                        ({type(text_generation_input.max_new_tokens)}): {text_generation_input.max_new_tokens}"
+                    "[DEBUG] input `max_new_tokens` type"
+                    f"({type(text_generation_input.max_new_tokens)}): "
+                    f"{text_generation_input.max_new_tokens}"
                 )
 
             if input_name == "top_k":
                 text_generation_input.top_k = int.from_bytes(b_input_tensor, "little")
                 print(
-                    f"[DEBUG] input `top_k` type\
-                        ({type(text_generation_input.top_k)}): {text_generation_input.top_k}"
+                    "[DEBUG] input `top_k` type"
+                    f"({type(text_generation_input.top_k)}): "
+                    f"{text_generation_input.top_k}"
                 )
 
             if input_name == "temperature":
@@ -130,8 +177,9 @@ def parse_task_text_generation_input(request) -> TextGenerationInput:
                     0
                 ]
                 print(
-                    f"[DEBUG] input `temperature` type\
-                        ({type(text_generation_input.temperature)}): {text_generation_input.temperature}"
+                    "[DEBUG] input `temperature` type"
+                    f"({type(text_generation_input.temperature)}): "
+                    f"{text_generation_input.temperature}"
                 )
                 text_generation_input.temperature = round(
                     text_generation_input.temperature, 2
@@ -142,44 +190,23 @@ def parse_task_text_generation_input(request) -> TextGenerationInput:
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `random_seed` type\
-                        ({type(text_generation_input.random_seed)}): {text_generation_input.random_seed}"
-                )
-
-            if input_name == "stop_words":
-                input_tensor = deserialize_bytes_tensor(b_input_tensor)
-                text_generation_input.stop_words = input_tensor[0]
-                print(
-                    f"[DEBUG] input `stop_words` type\
-                        ({type(text_generation_input.stop_words)}): {text_generation_input.stop_words}"
-                )
-                if len(text_generation_input.stop_words) == 0:
-                    text_generation_input.stop_words = None
-                elif text_generation_input.stop_words.shape[0] > 1:
-                    # TODO: Check wether shoule we decode this words
-                    text_generation_input.stop_words = list(
-                        text_generation_input.stop_words
-                    )
-                else:
-                    text_generation_input.stop_words = [
-                        str(text_generation_input.stop_words[0])
-                    ]
-                print(
-                    f"[DEBUG] parsed input `stop_words` type\
-                        ({type(text_generation_input.stop_words)}): {text_generation_input.stop_words}"
+                    "[DEBUG] input `random_seed` type"
+                    f"({type(text_generation_input.random_seed)}): "
+                    f"{text_generation_input.random_seed}"
                 )
 
             if input_name == "extra_params":
                 input_tensor = deserialize_bytes_tensor(b_input_tensor)
                 extra_params_str = str(input_tensor[0].decode("utf-8"))
                 print(
-                    f"[DEBUG] input `extra_params` type\
-                        ({type(extra_params_str)}): {extra_params_str}"
+                    "[DEBUG] input `extra_params` type"
+                    f"({type(extra_params_str)}): "
+                    f"{extra_params_str}"
                 )
 
                 try:
                     text_generation_input.extra_params = json.loads(extra_params_str)
-                except json.decoder.JSONDecodeError:
+                except JSONDecodeError:
                     print("[DEBUG] WARNING `extra_params` parsing faield!")
                     continue
 
@@ -259,7 +286,7 @@ def parse_task_text_to_image_input(request) -> TextToImageInput:
 
                 try:
                     text_to_image_input.extra_params = json.loads(extra_params_str)
-                except json.decoder.JSONDecodeError:
+                except JSONDecodeError:
                     print("[DEBUG] WARNING `extra_params` parsing faield!")
                     continue
 
@@ -345,7 +372,7 @@ def parse_task_image_to_image_input(request) -> ImageToImageInput:
 
                 try:
                     image_to_image_input.extra_params = json.loads(extra_params_str)
-                except json.decoder.JSONDecodeError:
+                except JSONDecodeError:
                     print("[DEBUG] WARNING `extra_params` parsing faield!")
                     continue
 
@@ -362,19 +389,59 @@ def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput:
         for i, b_input_tensor in zip(request.inputs, request.raw_input_contents):
             input_name = i.name
 
-            if input_name == "conversation":
+            if input_name == "prompt":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                text_generation_chat_input.prompt = str(input_tensor[0].decode("utf-8"))
+                print(
+                    f"[DEBUG] input `prompt` type\
+                        ({type(text_generation_chat_input.prompt)}): {text_generation_chat_input.prompt}"
+                )
+
+            if input_name == "prompt_images":
+                input_tensors = deserialize_bytes_tensor(b_input_tensor)
+                images = []
+                for enc in input_tensors:
+                    pil_img = Image.open(io.BytesIO(enc.astype(bytes)))  # RGB
+                    image = np.array(pil_img)
+                    if len(image.shape) == 2:  # gray image
+                        raise ValueError(
+                            f"The image shape with {image.shape} is "
+                            f"not in acceptable"
+                        )
+                    images.append(image)
+                # TODO: check wethere there are issues in batch size dimention
+                text_generation_chat_input.prompt_images = images
+                print(
+                    "[DEBUG] input `prompt_images` type"
+                    f"({type(text_generation_chat_input.prompt_images)}): "
+                    f"{text_generation_chat_input.prompt_images}"
+                )
+
+            if input_name == "chat_history":
                 input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                chat_history_str = str(input_tensor[0].decode("utf-8"))
+                print(
+                    "[DEBUG] input `chat_history_str` type"
+                    f"({type(chat_history_str)}): "
+                    f"{chat_history_str}"
+                )
                 try:
-                    text_generation_chat_input.conversation = json.loads(
-                        str(input_tensor[0].decode("utf-8"))
+                    text_generation_chat_input.chat_history = json.loads(
+                        chat_history_str
                     )
-                except Exception as e:
-                    raise JSONDecodeError(
-                        "can't parse conversation json string", "", 0
-                    ) from e
+                except JSONDecodeError:
+                    print("[DEBUG] WARNING `extra_params` parsing faield!")
+                    continue
+
+            if input_name == "system_message":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                text_generation_chat_input.system_message = str(
+                    input_tensor[0].decode("utf-8")
+                )
                 print(
-                    f"[DEBUG] input `conversation` type\
-                        ({type(text_generation_chat_input.conversation)}): {text_generation_chat_input.conversation}"
+                    "[DEBUG] input `system_message` type"
+                    f"({type(text_generation_chat_input.system_message)}): "
+                    f"{text_generation_chat_input.system_message}"
                 )
 
             if input_name == "max_new_tokens":
@@ -382,9 +449,9 @@ def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput:
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `max_new_tokens` type\
-                        ({type(text_generation_chat_input.max_new_tokens)}):\
-                            {text_generation_chat_input.max_new_tokens}"
+                    "[DEBUG] input `max_new_tokens` type"
+                    f"({type(text_generation_chat_input.max_new_tokens)}): "
+                    f"{text_generation_chat_input.max_new_tokens}"
                 )
 
             if input_name == "top_k":
@@ -392,9 +459,9 @@ def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput:
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `top_k` type\
-                        ({type(text_generation_chat_input.top_k)}):\
-                            {text_generation_chat_input.top_k}"
+                    "[DEBUG] input `top_k` type"
+                    f"({type(text_generation_chat_input.top_k)}): "
+                    f"{text_generation_chat_input.top_k}"
                 )
 
             if input_name == "temperature":
@@ -402,9 +469,9 @@ def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput:
                     "f", b_input_tensor
                 )[0]
                 print(
-                    f"[DEBUG] input `temperature` type\
-                        ({type(text_generation_chat_input.temperature)}):\
-                            {text_generation_chat_input.temperature}"
+                    "[DEBUG] input `temperature` type"
+                    f"({type(text_generation_chat_input.temperature)}): "
+                    f"{text_generation_chat_input.temperature}"
                 )
                 text_generation_chat_input.temperature = round(
                     text_generation_chat_input.temperature, 2
@@ -415,24 +482,25 @@ def parse_task_text_generation_chat_input(request) -> TextGenerationChatInput:
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `random_seed` type\
-                        ({type(text_generation_chat_input.random_seed)}):\
-                            {text_generation_chat_input.random_seed}"
+                    "[DEBUG] input `random_seed` type"
+                    f"({type(text_generation_chat_input.random_seed)}): "
+                    f"{text_generation_chat_input.random_seed}"
                 )
 
             if input_name == "extra_params":
                 input_tensor = deserialize_bytes_tensor(b_input_tensor)
                 extra_params_str = str(input_tensor[0].decode("utf-8"))
                 print(
-                    f"[DEBUG] input `extra_params` type\
-                        ({type(extra_params_str)}): {extra_params_str}"
+                    "[DEBUG] input `extra_params` type"
+                    f"({type(extra_params_str)}): "
+                    f"{extra_params_str}"
                 )
 
                 try:
                     text_generation_chat_input.extra_params = json.loads(
                         extra_params_str
                     )
-                except json.decoder.JSONDecodeError:
+                except JSONDecodeError:
                     print("[DEBUG] WARNING `extra_params` parsing faield!")
                     continue
 
@@ -453,7 +521,18 @@ def parse_task_visual_question_answering_input(
         for i, b_input_tensor in zip(request.inputs, request.raw_input_contents):
             input_name = i.name
 
-            if input_name == "prompt_image":
+            if input_name == "prompt":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                text_visual_question_answering_input.prompt = str(
+                    input_tensor[0].decode("utf-8")
+                )
+                print(
+                    "[DEBUG] input `prompt` type"
+                    f"({type(text_visual_question_answering_input.prompt)}): "
+                    f"{text_visual_question_answering_input.prompt}"
+                )
+
+            if input_name == "prompt_images":
                 input_tensors = deserialize_bytes_tensor(b_input_tensor)
                 images = []
                 for enc in input_tensors:
@@ -465,22 +544,39 @@ def parse_task_visual_question_answering_input(
                             f"not in acceptable"
                         )
                     images.append(image)
-                text_visual_question_answering_input.prompt_image = images[0]
+                # TODO: check wethere there are issues in batch size dimention
+                text_visual_question_answering_input.prompt_images = images
                 print(
-                    f"[DEBUG] input `prompt_image` type\
-                        ({type(text_visual_question_answering_input.prompt_image)}): \
-                            {text_visual_question_answering_input.prompt_image}"
+                    "[DEBUG] input `prompt_images` type"
+                    f"({type(text_visual_question_answering_input.prompt_images)}): "
+                    f"{text_visual_question_answering_input.prompt_images}"
                 )
 
-            if input_name == "prompt":
+            if input_name == "chat_history":
                 input_tensor = deserialize_bytes_tensor(b_input_tensor)
-                text_visual_question_answering_input.prompt = str(
+                chat_history_str = str(input_tensor[0].decode("utf-8"))
+                print(
+                    "[DEBUG] input `chat_history_str` type"
+                    f"({type(chat_history_str)}): "
+                    f"{chat_history_str}"
+                )
+                try:
+                    text_visual_question_answering_input.chat_history = json.loads(
+                        chat_history_str
+                    )
+                except JSONDecodeError:
+                    print("[DEBUG] WARNING `extra_params` parsing faield!")
+                    continue
+
+            if input_name == "system_message":
+                input_tensor = deserialize_bytes_tensor(b_input_tensor)
+                text_visual_question_answering_input.system_message = str(
                     input_tensor[0].decode("utf-8")
                 )
                 print(
-                    f"[DEBUG] input `prompt` type\
-                        ({type(text_visual_question_answering_input.prompt)}):\
-                            {text_visual_question_answering_input.prompt}"
+                    "[DEBUG] input `system_message` type"
+                    f"({type(text_visual_question_answering_input.system_message)}): "
+                    f"{text_visual_question_answering_input.system_message}"
                 )
 
             if input_name == "max_new_tokens":
@@ -488,9 +584,9 @@ def parse_task_visual_question_answering_input(
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `max_new_tokens` type\
-                        ({type(text_visual_question_answering_input.max_new_tokens)}):\
-                            {text_visual_question_answering_input.max_new_tokens}"
+                    "[DEBUG] input `max_new_tokens` type"
+                    f"({type(text_visual_question_answering_input.max_new_tokens)}): "
+                    f"{text_visual_question_answering_input.max_new_tokens}"
                 )
 
             if input_name == "top_k":
@@ -498,9 +594,9 @@ def parse_task_visual_question_answering_input(
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `top_k` type\
-                        ({type(text_visual_question_answering_input.top_k)}):\
-                            {text_visual_question_answering_input.top_k}"
+                    "[DEBUG] input `top_k` type"
+                    f"({type(text_visual_question_answering_input.top_k)}): "
+                    f"{text_visual_question_answering_input.top_k}"
                 )
 
             if input_name == "temperature":
@@ -508,9 +604,9 @@ def parse_task_visual_question_answering_input(
                     "f", b_input_tensor
                 )[0]
                 print(
-                    f"[DEBUG] input `temperature` type\
-                        ({type(text_visual_question_answering_input.temperature)}):\
-                            {text_visual_question_answering_input.temperature}"
+                    "[DEBUG] input `temperature` type"
+                    f"({type(text_visual_question_answering_input.temperature)}): "
+                    f"{text_visual_question_answering_input.temperature}"
                 )
                 text_visual_question_answering_input.temperature = round(
                     text_visual_question_answering_input.temperature, 2
@@ -521,24 +617,25 @@ def parse_task_visual_question_answering_input(
                     b_input_tensor, "little"
                 )
                 print(
-                    f"[DEBUG] input `random_seed` type\
-                        ({type(text_visual_question_answering_input.random_seed)}):\
-                            {text_visual_question_answering_input.random_seed}"
+                    "[DEBUG] input `random_seed` type"
+                    f"({type(text_visual_question_answering_input.random_seed)}): "
+                    f"{text_visual_question_answering_input.random_seed}"
                 )
 
             if input_name == "extra_params":
                 input_tensor = deserialize_bytes_tensor(b_input_tensor)
                 extra_params_str = str(input_tensor[0].decode("utf-8"))
                 print(
-                    f"[DEBUG] input `extra_params` type\
-                        ({type(extra_params_str)}): {extra_params_str}"
+                    "[DEBUG] input `extra_params` type"
+                    f"({type(extra_params_str)}): "
+                    f"{extra_params_str}"
                 )
 
                 try:
                     text_visual_question_answering_input.extra_params = json.loads(
                         extra_params_str
                     )
-                except json.decoder.JSONDecodeError:
+                except JSONDecodeError:
                     print("[DEBUG] WARNING `extra_params` parsing faield!")
                     continue
 
diff --git a/instill/protogen b/instill/protogen
index e528ce4..1d0323a 160000
--- a/instill/protogen
+++ b/instill/protogen
@@ -1 +1 @@
-Subproject commit e528ce44652f51d1270c064f611a38ebdee0b34b
+Subproject commit 1d0323a7b4b19ee2716744abef172e551bcc85e0