Skip to content

Commit

Permalink
Fix llama recorder multi image recorder (#677)
Browse files Browse the repository at this point in the history
This PR fixes one problem with recording multiple image at once using
llama recorder.
  • Loading branch information
haixuanTao authored Oct 7, 2024
2 parents 7d93fe6 + 5917b9e commit 4b7be45
Show file tree
Hide file tree
Showing 2 changed files with 27 additions and 17 deletions.
9 changes: 7 additions & 2 deletions node-hub/dora-qwenvl/dora_qwenvl/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"DEFAULT_QUESTION",
"Describe this image",
)
ADAPTER_PATH = os.getenv("ADAPTER_PATH", "")

# Check if flash_attn is installed
try:
Expand All @@ -23,16 +24,20 @@
device_map="auto",
attn_implementation="flash_attention_2",
)
except ImportError:
except (ImportError, ModuleNotFoundError):
model = Qwen2VLForConditionalGeneration.from_pretrained(
CUSTOM_MODEL_PATH,
torch_dtype="auto",
device_map="auto",
)


if ADAPTER_PATH != "":
model.load_adapter(ADAPTER_PATH, "dora")


# default processor
processor = AutoProcessor.from_pretrained(DEFAULT_PATH)
processor = AutoProcessor.from_pretrained(CUSTOM_MODEL_PATH)


def generate(frames: dict, question):
Expand Down
35 changes: 20 additions & 15 deletions node-hub/llama-factory-recorder/llama_factory_recorder/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def write_dict_to_json(file_path, key: str, new_data):


def save_image_and_add_to_json(
image_array, root_path, llama_root_path, jsonl_file, messages
frame_dict: dict, root_path, llama_root_path, jsonl_file, messages
):
"""
Saves an image from a NumPy array and adds a new JSON object as a line to a JSONL file.
Expand All @@ -69,17 +69,19 @@ def save_image_and_add_to_json(
if os.path.isfile(os.path.join(llama_root_path / root_path, name))
]
)
image_paths = []
for event_id, data in frame_dict.items():
# Define the image filename
image_filename = f"{event_id}-{image_id}.png"
image_path = os.path.join(root_path, image_filename)

# Define the image filename
image_filename = f"{image_id}.png"
image_path = os.path.join(root_path, image_filename)

# Save the image
image = Image.fromarray(image_array)
image.save(llama_root_path / image_path)
# Save the image
image = Image.fromarray(data)
image.save(llama_root_path / image_path)
image_paths.append(image_path)

# Create the JSON entry with 'messages' and 'images'
new_entry = {"messages": messages, "images": [image_path]}
new_entry = {"messages": messages, "images": image_paths}

# Add the entry to the JSONL file with UTF-8 encoding
with open(jsonl_file, "a", encoding="utf-8") as f:
Expand Down Expand Up @@ -123,15 +125,15 @@ def main():
)

question = DEFAULT_QUESTION
frame = None
frames = {}

for event in node:
event_type = event["type"]

if event_type == "INPUT":
event_id = event["id"]

if event_id == "image":
if "image" in event_id:
storage = event["value"]
metadata = event["metadata"]
encoding = metadata["encoding"]
Expand All @@ -153,7 +155,7 @@ def main():
.reshape((height, width, channels))
)
if encoding == "bgr8":
frame = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
frames[event_id] = frame[:, :, ::-1] # OpenCV image (BGR to RGB)
elif encoding == "rgb8":
pass
else:
Expand All @@ -164,20 +166,23 @@ def main():
if text != "":
question = text
elif event_id == "ground_truth":
if frame is None:
if len(frames.keys()) == 0:
continue
ground_truth = event["value"][0].as_py()

messages = [
{"content": "<image>" + question, "role": "user"},
{
"content": "<image>" * len(frames.keys()) + question,
"role": "user",
},
{
"content": ground_truth,
"role": "assistant",
},
]

save_image_and_add_to_json(
image_array=frame,
frame_dict=frames,
root_path=entry_name,
llama_root_path=llama_factory_root_path,
jsonl_file=default_record_json_path,
Expand Down

0 comments on commit 4b7be45

Please sign in to comment.