Finetuning OmniParser #3 #3

abrichr · 2024-11-03T16:21:56Z

Objective:
- Implement fine-tuning for OmniParser’s YOLO model to enhance detection accuracy on small icons and UI elements.
Context:
- Current limitations in detecting small or densely packed icons due to model sensitivity thresholds.
Proposed Solution:
- Data Collection: Assemble a labeled dataset of small icons/UI elements, including bounding boxes in YOLO format.
- Training Configuration: Use YOLO-specific parameters, adjusting image size (e.g., 640x640) and hyperparameters to improve small object sensitivity.
- Integration Steps:
  - Modify get_yolo_model to support loading the fine-tuned model.
  - Update config to reference the fine-tuned model.
  - Provide a train_yolo function to manage the fine-tuning process.
- Testing: Evaluate detection accuracy on new test images containing small icons/UI elements, adjusting BOX_THRESHOLD as needed.
Expected Outcome:
- More accurate small icon detection, fewer missed icons in dense layouts, and reduced reliance on preprocessing.

The text was updated successfully, but these errors were encountered:

abrichr · 2024-11-07T17:07:16Z

Draft:

"""
Fine-tunes and evaluates OmniParser on custom datasets. Tracks performance improvements 
using sensitivity and IoU metrics stored in `finetune.db`. Generates a report comparing 
baseline vs. post-tuning metrics.

Example usage:
    python finetune.py run_finetune --data_dir="path/to/dataset" --model_path="path/to/model.pt" --epochs=50
"""

import os
import sqlite3
from datetime import datetime
from typing import Optional
import pandas as pd
from loguru import logger
from ultralytics import YOLO
import fire

class FineTuneAdapter:
    def __init__(self, db_path: str = "finetune.db"):
        self.db_path = db_path
        self.conn = sqlite3.connect(db_path)
        self.create_tables()

    def create_tables(self):
        cursor = self.conn.cursor()
        cursor.executescript('''
            CREATE TABLE IF NOT EXISTS model_info (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                model_name TEXT,
                model_path TEXT,
                training_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            );

            CREATE TABLE IF NOT EXISTS evaluation_metrics (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                model_id INTEGER,
                image_path TEXT,
                sensitivity_score REAL,
                total_elements INTEGER,
                true_positives INTEGER,
                FOREIGN KEY (model_id) REFERENCES model_info(id)
            );

            CREATE TABLE IF NOT EXISTS summary_metrics (
                id INTEGER PRIMARY KEY AUTOINCREMENT,
                model_id INTEGER,
                avg_sensitivity_score REAL,
                improvement_percentage REAL,
                baseline_sensitivity REAL,
                post_tuning_sensitivity REAL,
                FOREIGN KEY (model_id) REFERENCES model_info(id)
            );
        ''')
        self.conn.commit()

    def log_model(self, model_name: str, model_path: str) -> int:
        cursor = self.conn.cursor()
        cursor.execute(
            "INSERT INTO model_info (model_name, model_path) VALUES (?, ?)",
            (model_name, model_path)
        )
        self.conn.commit()
        return cursor.lastrowid

    def log_evaluation_metrics(self, model_id: int, image_path: str, sensitivity_score: float, total_elements: int, true_positives: int):
        cursor = self.conn.cursor()
        cursor.execute(
            '''
            INSERT INTO evaluation_metrics (model_id, image_path, sensitivity_score, total_elements, true_positives)
            VALUES (?, ?, ?, ?, ?)
            ''', (model_id, image_path, sensitivity_score, total_elements, true_positives)
        )
        self.conn.commit()

    def log_summary_metrics(self, model_id: int, baseline_sensitivity: float, post_tuning_sensitivity: float):
        improvement_percentage = ((post_tuning_sensitivity - baseline_sensitivity) / baseline_sensitivity) * 100
        avg_sensitivity_score = (baseline_sensitivity + post_tuning_sensitivity) / 2
        cursor = self.conn.cursor()
        cursor.execute(
            '''
            INSERT INTO summary_metrics (model_id, avg_sensitivity_score, improvement_percentage, baseline_sensitivity, post_tuning_sensitivity)
            VALUES (?, ?, ?, ?, ?)
            ''', (model_id, avg_sensitivity_score, improvement_percentage, baseline_sensitivity, post_tuning_sensitivity)
        )
        self.conn.commit()

    def evaluate_model_sensitivity(self, data_dir: str, model_path: str) -> float:
        """Evaluates model sensitivity on validation set, logging results in finetune.db."""
        yolo_model = YOLO(model_path)
        total_score = 0
        total_elements = 0
        true_positives_total = 0
        model_id = self.log_model(model_name=os.path.basename(model_path), model_path=model_path)

        for image_file in os.listdir(data_dir):
            if image_file.endswith(".jpg"):
                image_path = os.path.join(data_dir, image_file)
                results = yolo_model(image_path)
                ground_truth_boxes = self.load_ground_truth_boxes(image_file)  # Placeholder function
                detected_boxes = results[0].boxes.xyxy.tolist()
                sensitivity_score, true_positives = self.calculate_sensitivity_score(detected_boxes, ground_truth_boxes)
                
                # Log each evaluation metric
                self.log_evaluation_metrics(model_id, image_path, sensitivity_score, len(ground_truth_boxes), true_positives)
                total_score += sensitivity_score
                total_elements += len(ground_truth_boxes)
                true_positives_total += true_positives

        avg_sensitivity_score = total_score / total_elements if total_elements > 0 else 0.0
        return avg_sensitivity_score

    def generate_report(self):
        """Generates a report comparing baseline and post-tuning sensitivity and IoU improvements."""
        cursor = self.conn.cursor()
        cursor.execute("SELECT model_id, baseline_sensitivity, post_tuning_sensitivity, improvement_percentage FROM summary_metrics ORDER BY id DESC LIMIT 1")
        result = cursor.fetchone()

        if result:
            model_id, baseline, post_tuning, improvement = result
            data = {
                "Metric": ["Sensitivity", "Mean IoU"],
                "Baseline": [baseline, "0.68"],  # Example IoU value
                "Post-Tuning": [post_tuning, "0.79"],  # Example IoU value
                "Improvement": [f"{improvement:.2f}%", "16.2%"]  # Example IoU improvement
            }
            report_df = pd.DataFrame(data)
            logger.info("Sensitivity Improvement Summary\n" + report_df.to_markdown())
        else:
            logger.info("No summary metrics found.")

    @staticmethod
    def calculate_sensitivity_score(detected_boxes, ground_truth_boxes):
        """Calculates sensitivity score based on Intersection over Union (IoU)."""
        iou_threshold = 0.5
        true_positives = sum([iou(box, gt_box) > iou_threshold for box in detected_boxes for gt_box in ground_truth_boxes])
        sensitivity_score = true_positives / len(ground_truth_boxes) if ground_truth_boxes else 0
        return sensitivity_score, true_positives

    @staticmethod
    def load_ground_truth_boxes(image_file: str) -> list:
        """Placeholder: Implement actual ground truth box loading logic here."""
        return []

    @staticmethod
    def iou(box1, box2) -> float:
        """Calculates Intersection over Union (IoU) between two boxes."""
        x1, y1, x2, y2 = box1
        x1_gt, y1_gt, x2_gt, y2_gt = box2
        intersection_area = max(0, min(x2, x2_gt) - max(x1, x1_gt)) * max(0, min(y2, y2_gt) - max(y1, y1_gt))
        box1_area = (x2 - x1) * (y2 - y1)
        box2_area = (x2_gt - x1_gt) * (y2_gt - y1_gt)
        return intersection_area / (box1_area + box2_area - intersection_area)

    def run_finetune(self, data_dir: str, model_path: str, epochs: int = 50):
        """Runs fine-tuning on the dataset, logs evaluation results, and generates a report."""
        logger.info("Loading Dataset...")
        logger.info(f"Training set: {data_dir}")
        
        logger.info("Starting Baseline Model Evaluation")
        baseline_sensitivity = self.evaluate_model_sensitivity(data_dir, model_path)
        
        logger.info("Starting Fine-Tuning Process...")
        yolo_model = YOLO(model_path)
        yolo_model.train(data=data_dir, epochs=epochs)

        finetuned_model_path = model_path.replace(".pt", "_finetuned.pt")
        yolo_model.save(finetuned_model_path)
        
        logger.info("Starting Post-Tuning Model Evaluation")
        post_tuning_sensitivity = self.evaluate_model_sensitivity(data_dir, finetuned_model_path)
        
        self.log_summary_metrics(model_id=self.log_model("finetuned", finetuned_model_path), baseline_sensitivity=baseline_sensitivity, post_tuning_sensitivity=post_tuning_sensitivity)
        
        self.generate_report()

if __name__ == "__main__":
    fire.Fire(FineTuneAdapter)

abrichr changed the title ~~Finetuning for Improved Small Icon Detection in OmniParser #3~~ Finetuning OmniParser #3 Nov 4, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Finetuning OmniParser #3 #3

Finetuning OmniParser #3 #3

abrichr commented Nov 3, 2024

abrichr commented Nov 7, 2024

Finetuning OmniParser #3 #3

Finetuning OmniParser #3 #3

Comments

abrichr commented Nov 3, 2024

abrichr commented Nov 7, 2024