//
//  KeypointsProcessor.swift
//  PickleBallUI
//
//  Created by Michael Zhang on 1/20/25.
//

import CoreML
import SwiftUI
import Vision

class KeypointsProcessor: ObservableObject {
    @Published var confidenceThreshold: Float = 0.3
    @Published var iouThreshold: Float = 0.6
    @Published var maskThreshold: Float = 0.5
    
    @MainActor @Published var predictions: [Prediction] = []
    @Published var maskPredictions: [MaskPrediction] = []
    @Published var combinedMaskImage: UIImage?
    
    private var isProcessing = false
    
    @Published var currentMask: CGImage?
        
    // ADD LOGIC TO DISPLAY MASK ON A PREVIEW LAYER OF THE CAMERA
    func processFrame(_ pixelBuffer: CVPixelBuffer, height: Int, width: Int) async {
        guard !isProcessing else { return }
        isProcessing = true
        defer { isProcessing = false }
        
        print("Processing frame with dimensions: \(width) x \(height)")
        let mask = await runCoreMLInference(pixelBuffer: pixelBuffer, width: width, height: height)
        
        if let mask = mask {
            print("Got mask with dimensions: \(mask.maskSize)")
            if let cgImage = mask.toCGImage() {
                print("Successfully created CGImage with dimensions: \(cgImage.width) x \(cgImage.height)")
//                let uiImage = UIImage(cgImage: cgImage)
//                UIImageWriteToSavedPhotosAlbum(uiImage, nil, nil, nil)
//                print("SAVED")
                await MainActor.run {
                    self.currentMask = cgImage
                    print("Updated currentMask on main thread")
                }
            } else {
                print("Failed to create CGImage from mask")
            }
        } else {
            print("No mask generated")
        }
    }
    
    private func runCoreMLInference(pixelBuffer: CVPixelBuffer, width: Int, height: Int) async -> MaskPrediction? {
        
        let config = MLModelConfiguration()
        guard let model = try? KeypointDetectorModel(configuration: config) else {
            NSLog("Failed to init model")
            return nil
        }
        
        do {
            let outputs = try model.prediction(input_image: pixelBuffer)
            let boxesOutput = outputs.var_1052
            let masksOutput = outputs.var_742
            let numSegmentationMasks = 32
            let numClasses = Int(truncating: boxesOutput.shape[1]) - 4 - numSegmentationMasks
            var predictions = getPredictionsFromOutput(output: boxesOutput, rows: Int(truncating: boxesOutput.shape[1]), columns: Int(truncating: boxesOutput.shape[2]), numberOfClasses: numClasses, inputImgSize: CGSize(width: CVPixelBufferGetWidth(pixelBuffer), height: CVPixelBufferGetHeight(pixelBuffer)))
            predictions.removeAll { $0.score < confidenceThreshold }
            guard !predictions.isEmpty else { return nil }
            let groupedPredictions = Dictionary(grouping: predictions) { prediction in
                prediction.classIndex
            }
            var nmsPredictions: [Prediction] = []
            let _ = groupedPredictions.mapValues { predictions in
                nmsPredictions.append(
                contentsOf: nonMaximumSuppression(predictions: predictions, iouThreshold: iouThreshold, limit: 100)
                )
            }
            NSLog("\(nmsPredictions.count) boxes left after performing nms with iou threshold of 0.6")
            
            guard !nmsPredictions.isEmpty else { return nil }
            await MainActor.run { [weak self, nmsPredictions] in
                self?.predictions = nmsPredictions
            }
            
            let maskProtos = getMaskProtosFromOutput(output: masksOutput, rows: Int(truncating: masksOutput.shape[3]), columns: Int(truncating: masksOutput.shape[2]), tubes: Int(truncating: masksOutput.shape[1]))
            
            // CHANGE THIS, pixelBuffer is not right to get width, pass OG pixel buffer dimensions to use here

            let maskPredictions = masksFromProtos(boxPredictions: nmsPredictions, maskProtos: maskProtos, maskSize: (width: Int(truncating: masksOutput.shape[3]), height: Int(truncating: masksOutput.shape[2])), originalImgSize: CGSize(width: width, height: height))
            
            await MainActor.run { [weak self, maskPredictions] in
                self?.maskPredictions = maskPredictions
            }
            return maskPredictions.first
        } catch {
            NSLog("Error in CoreML inference: \(error)")
            return nil
        }
    }
}

extension KeypointsProcessor {
    func getPredictionsFromOutput(
        output: MLMultiArray,
        rows: Int,
        columns: Int,
        numberOfClasses: Int,
        inputImgSize: CGSize
    ) -> [Prediction] {
        guard output.count != 0 else {
            return []
        }
        var predictions = [Prediction]()
        for i in 0..<columns {
            let centerX = Float(truncating: output[0*columns+i])
            let centerY = Float(truncating: output[1*columns+i])
            let width = Float(truncating: output[2*columns+i])
            let height = Float(truncating: output[3*columns+i])
            
            let (classIndex, score) = {
                var classIndex: Int = 0
                var heighestScore: Float = 0
                for j in 0..<numberOfClasses {
                    let score = Float(truncating: output[(4+j)*columns+i])
                    if score > heighestScore {
                        heighestScore = score
                        classIndex = j
                    }
                }
                return (classIndex, heighestScore)
            }()
            
            let maskCoefficients = {
                var coefficients: [Float] = []
                for k in 0..<32 {
                    coefficients.append(Float(truncating: output[(4+numberOfClasses+k)*columns+i]))
                }
                return coefficients
            }()
            
            // Convert box from xywh to xyxy
            let left = centerX - width/2
            let top = centerY - height/2
            let right = centerX + width/2
            let bottom = centerY + height/2
            
            let prediction = Prediction(
                classIndex: classIndex,
                score: score,
                xyxy: (left, top, right, bottom),
                maskCoefficients: maskCoefficients,
                inputImgSize: inputImgSize
            )
            predictions.append(prediction)
        }
        
        return predictions
    }
    
    func nonMaximumSuppression(
        predictions: [Prediction],
        iouThreshold: Float,
        limit: Int
    ) -> [Prediction] {
        guard !predictions.isEmpty else {
            return []
        }
        let sortedIndices = predictions.indices.sorted {
            predictions[$0].score > predictions[$1].score
        }
        var selected: [Prediction] = []
        var active = [Bool](repeating: true, count: predictions.count)
        var numActive = active.count
        outer: for i in 0..<predictions.count {
            if active[i] {
                let boxA = predictions[sortedIndices[i]]
                selected.append(boxA)
                if selected.count >= limit { break }
                for j in i+1..<predictions.count {
                    if active[j] {
                        let boxB = predictions[sortedIndices[j]]
                        if IOU(a: boxA.xyxy, b: boxB.xyxy) > iouThreshold {
                            active[j] = false
                            numActive -= 1
                            if numActive <= 0 { break outer }
                        }
                    }
                }
            }
        }
        return selected
    }
    
    private func IOU(a: XYXY, b: XYXY) -> Float {
        // Calculate the intersection coordinates
        let x1 = max(a.x1, b.x1)
        let y1 = max(a.y1, b.y1)
        let x2 = max(a.x2, b.x2)
        let y2 = max(a.y1, b.y2)
        
        // Calculate the intersection area
        let intersection = max(x2 - x1, 0) * max(y2 - y1, 0)
        
        // Calculate the union area
        let area1 = (a.x2 - a.x1) * (a.y2 - a.y1)
        let area2 = (b.x2 - b.x1) * (b.y2 - b.y1)
        let union = area1 + area2 - intersection
        
        // Calculate the IoU score
        let iou = intersection / union
        
        return iou
    }
    
    func getMaskProtosFromOutput(
        output: MLMultiArray,
        rows: Int,
        columns: Int,
        tubes: Int
    ) -> [[UInt8]] {
        var masks: [[UInt8]] = []
        for tube in 0..<tubes {
            var mask: [UInt8] = []
            for i in 0..<(rows*columns) {
                let index = tube*(rows*columns)+i
                mask.append(UInt8(truncating: output[index]))
            }
            masks.append(mask)
        }
        return masks
    }
    
    func masksFromProtos(
        boxPredictions: [Prediction],
        maskProtos: [[UInt8]],
        maskSize: (width: Int, height: Int),
        originalImgSize: CGSize
    ) -> [MaskPrediction] {
        NSLog("Generate masks from prototypes")
        var maskPredictions: [MaskPrediction] = []
        for prediction in boxPredictions {
            
            let maskCoefficients = prediction.maskCoefficients
            
            var finalMask: [Float] = []
            for (index, maskProto) in maskProtos.enumerated() {
                let weight = maskCoefficients[index]
                finalMask = finalMask.add(maskProto.map { Float($0) * weight })
            }
            
            NSLog("Apply sigmoid")
            finalMask = finalMask.map { sigmoid(value: $0) }
            
            NSLog("Crop mask to bounding box")
            let croppedMask = crop(
                mask: finalMask,
                maskSize: maskSize,
                box: prediction.xyxy)

            let scale = min(
                max(
                    Int(originalImgSize.width) / maskSize.width,
                    Int(originalImgSize.height) / maskSize.height),
                6)
            let targetSize = (
                width: maskSize.width * scale,
                height: maskSize.height * scale)
            
            NSLog("Upsample mask with size \(maskSize) to \(targetSize)")
            let upsampledMask = croppedMask
                .map { Float(($0 > maskThreshold ? 1 : 0)) }
                .upsample(
                    initialSize: maskSize,
                    scale: scale)
                .map { UInt8(($0 > maskThreshold ? 1 : 0) * 255) }
            
            maskPredictions.append(
                MaskPrediction(
                    classIndex: prediction.classIndex,
                    mask: upsampledMask,
                    maskSize: targetSize))
        }
        
        return maskPredictions
    }
    
    func sigmoid(value: Float) -> Float {
        return 1.0 / (1.0 + exp(-value))
    }
    
    private func crop(
        mask: [Float],
        maskSize: (width: Int, height: Int),
        box: XYXY
    ) -> [Float] {
        let rows = maskSize.height
        let columns = maskSize.width
        
        let x1 = Int(box.x1 / 4)
        let y1 = Int(box.y1 / 4)
        let x2 = Int(box.x2 / 4)
        let y2 = Int(box.y2 / 4)
        
        var croppedArr: [Float] = []
        for row in 0..<rows {
            for column in 0..<columns {
                if column >= x1 && column <= x2 && row >= y1 && row <= y2 {
                    croppedArr.append(mask[row*columns+column])
                } else {
                    croppedArr.append(0)
                }
            }
        }
        return croppedArr
    }
}