forked from gitgutgit/AnalysisAiBot
-
Notifications
You must be signed in to change notification settings - Fork 0
/
MultipleimgsAi_V0.4.py
110 lines (91 loc) · 3.52 KB
/
MultipleimgsAi_V0.4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import base64
import requests
import os
import json
import re
import toml
max_tokens_img = 1100 # per each image (kor +300)
# OpenAI API Key
api_key = ""
with open("api_secrets.toml", "r") as f:
secrets = toml.load(f)
api_key = secrets["openai"]["api_key"]
# Function to encode the image
def encode_image(image_path):
with open(image_path, "rb") as image_file:
return base64.b64encode(image_file.read()).decode('utf-8')
# Path to your image
image_dir = "documents/image_sample/Images1"
json_template_path = "documents/json/visual_tag_en.json"
json_template_kor_path = "documents/json/visual_tag_ko.json"
# Read json file
with open(json_template_path, 'r') as file:
json_template_eng = json.load(file)
# Read json file
with open(json_template_kor_path, 'r') as file:
json_template_kor= json.load(file)
# For sorting
def natural_sort_key(s):
return [int(text) if text.isdigit() else text.lower() for text in re.split('([0-9]+)', s)]
# Getting the base64 string
image_files = [os.path.join(image_dir, f) for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f)) and f != '.DS_Store']
# Sorting the image files
image_files.sort(key=natural_sort_key)
print("List of image_files: ", image_files)
print("=====================================")
print("=====================================")
# Function to analyze an image
def analyze_image(image_path):
base64_image = encode_image(image_path)
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {api_key}"
}
prompt = f"""You are an expert in analyzing commercial videos, capable of thoroughly explaining camera techniques, the number of people present in the current shot, and other such elements. If the image contains only text or simple things, provide anaylsis simply. Here is the list of elements (explain in Korean) [if applicable]: {json.dumps(json_template_kor)}"""
content = [
{
"type": "text",
"text": prompt
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/jpeg;base64,{base64_image}"
}
}
]
payload = {
"model": "gpt-4o",
"messages": [
{
"role": "user",
"content": content
}
],
"max_tokens": max_tokens_img # Adjust max tokens if necessary
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
response_json = response.json()
return response_json['choices'][0]['message']['content']
# Analyzing each image and storing responses in a list
responses = []
for i, image_path in enumerate(image_files):
response_content = analyze_image(image_path)
responses.append({
"image_number": i + 1,
"image_path": image_path,
"analysis": response_content
})
print(f"Analyzed {image_path}")
# Saving responses to a text file
with open("image_analysis.txt", "w") as txt_file:
for response in responses:
txt_file.write(f"Image {response['image_number']}: {response['image_path']}\n")
txt_file.write(f"Analysis:\n{response['analysis']}\n")
txt_file.write("\n" + "="*40 + "\n\n")
# Saving responses to a JSON file
with open("image_analysis.json", "w") as json_file:
json.dump(responses, json_file, indent=4)
# Printing all responses
for response in responses:
print(f"Image {response['image_number']}: {response['image_path']}\nAnalysis: {response['analysis']}\n")