diff --git a/example/src/App.tsx b/example/src/App.tsx
index 2d76f7b..e8f68f7 100644
--- a/example/src/App.tsx
+++ b/example/src/App.tsx
@@ -27,14 +27,20 @@ import * as InatVision from 'vision-camera-plugin-inatvision';
const modelFilenameAndroid = 'small_inception_tf1.tflite';
const taxonomyFilenameAndroid = 'small_export_tax.csv';
+const geoModelFilenameAndroid = 'not_implemented';
const modelFilenameIOS = 'small_inception_tf1.mlmodelc';
const taxonomyFilenameIOS = 'small_export_tax.json';
+const geoModelFilenameIOS = 'small_geomodel.mlmodelc';
const modelVersion = '1.0';
const modelPath =
Platform.OS === 'ios'
? `${RNFS.DocumentDirectoryPath}/${modelFilenameIOS}`
: `${RNFS.DocumentDirectoryPath}/${modelFilenameAndroid}`;
+const geoModelPath =
+ Platform.OS === 'ios'
+ ? `${RNFS.DocumentDirectoryPath}/${geoModelFilenameIOS}`
+ : `${RNFS.DocumentDirectoryPath}/${geoModelFilenameAndroid}`;
const taxonomyPath =
Platform.OS === 'ios'
? `${RNFS.DocumentDirectoryPath}/${taxonomyFilenameIOS}`
@@ -49,6 +55,7 @@ export default function App(): React.JSX.Element {
undefined
);
const [negativeFilter, setNegativeFilter] = useState(false);
+ const [useGeoModel, setUseGeoModel] = useState(false);
enum VIEW_STATUS {
NONE,
@@ -66,6 +73,10 @@ export default function App(): React.JSX.Element {
setNegativeFilter(!negativeFilter);
};
+ const toggleUseGeoModel = () => {
+ setUseGeoModel(!useGeoModel);
+ };
+
const changeFilterByTaxonId = () => {
if (!filterByTaxonId) {
setFilterByTaxonId('47126');
@@ -110,6 +121,16 @@ export default function App(): React.JSX.Element {
.catch((error) => {
console.log(`error moving model file`, error);
});
+ RNFS.copyFile(
+ `${RNFS.MainBundlePath}/${geoModelFilenameIOS}`,
+ `${RNFS.DocumentDirectoryPath}/${geoModelFilenameIOS}`
+ )
+ .then((result) => {
+ console.log(`moved geo model file from`, result);
+ })
+ .catch((error) => {
+ console.log(`error moving geo model file`, error);
+ });
RNFS.copyFile(
`${RNFS.MainBundlePath}/${taxonomyFilenameIOS}`,
`${RNFS.DocumentDirectoryPath}/${taxonomyFilenameIOS}`
@@ -147,6 +168,11 @@ export default function App(): React.JSX.Element {
'worklet';
try {
const timeBefore = new Date().getTime();
+
+ const latitude = 37.28889;
+ const longitude = -121.94415;
+ const elevation = 15.0;
+
const cvResult: InatVision.Result = InatVision.inatVision(frame, {
version: modelVersion,
modelPath,
@@ -156,6 +182,11 @@ export default function App(): React.JSX.Element {
negativeFilter,
numStoredResults: 4,
cropRatio: 0.9,
+ latitude,
+ longitude,
+ elevation,
+ geoModelPath,
+ useGeoModel,
patchedOrientationAndroid: 'portrait',
});
const timeAfter = new Date().getTime();
@@ -167,7 +198,13 @@ export default function App(): React.JSX.Element {
}
});
},
- [confidenceThreshold, filterByTaxonId, negativeFilter, handleResults]
+ [
+ confidenceThreshold,
+ filterByTaxonId,
+ negativeFilter,
+ handleResults,
+ useGeoModel,
+ ]
);
function selectImage() {
@@ -316,6 +353,10 @@ export default function App(): React.JSX.Element {
onPress={() => setViewStatus(VIEW_STATUS.NONE)}
title="Close"
/>
+
) : (
diff --git a/ios/Classifier/VCPGeoModel.h b/ios/Classifier/VCPGeoModel.h
new file mode 100644
index 0000000..edaab55
--- /dev/null
+++ b/ios/Classifier/VCPGeoModel.h
@@ -0,0 +1,24 @@
+//
+// VCPGeoModel.h
+// VisionCameraPluginInatVision
+//
+// Created by Alex Shepard on 10/18/24.
+// Copyright © 2024 iNaturalist. All rights reserved.
+//
+
+#import
+@import CoreML;
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface VCPGeoModel : NSObject
+
+- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath;
+- (MLMultiArray *)predictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation;
+
+@property MLModel *geoModel;
+@property float locationChangeThreshold;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/ios/Classifier/VCPGeoModel.m b/ios/Classifier/VCPGeoModel.m
new file mode 100644
index 0000000..3df30de
--- /dev/null
+++ b/ios/Classifier/VCPGeoModel.m
@@ -0,0 +1,115 @@
+//
+// VCPGeoModel.m
+// VisionCameraPluginInatVision
+//
+// Created by Alex Shepard on 10/18/24.
+// Copyright © 2024 iNaturalist. All rights reserved.
+//
+
+#import "VCPGeoModel.h"
+
+@interface VCPGeoModel ()
+
+@property (nonatomic, strong) MLMultiArray *cachedGeoResult;
+@property (nonatomic) float cachedLatitude;
+@property (nonatomic) float cachedLongitude;
+@property (nonatomic) float cachedElevation;
+
+
+@end
+
+@implementation VCPGeoModel
+
+- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath {
+ if (self = [super init]) {
+ NSURL *geoModelUrl = [NSURL fileURLWithPath:modelPath];
+ if (!geoModelUrl) {
+ NSLog(@"no file for geo model");
+ return nil;
+ }
+
+ NSError *loadError = nil;
+ self.geoModel = [MLModel modelWithContentsOfURL:geoModelUrl error:&loadError];
+ if (loadError) {
+ NSString *errString = [NSString stringWithFormat:@"error loading geo model: %@",
+ loadError.localizedDescription];
+ NSLog(@"%@", errString);
+ return nil;
+ }
+ if (!self.geoModel) {
+ NSLog(@"unable to make geo model");
+ return nil;
+ }
+
+ // default location change threshold
+ self.locationChangeThreshold = -0.001;
+
+ }
+
+ return self;
+}
+
+/*
+ * iNat geo model input normalization documented here:
+ * https://github.com/inaturalist/inatGeoModelTraining/tree/main#input-normalization
+ */
+- (NSArray *)normAndEncodeLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
+ float normLat = latitude / 90.0;
+ float normLng = longitude / 180.0;
+ float normElev = 0.0;
+ if (elevation > 0) {
+ normElev = elevation / 5705.63;
+ } else {
+ normElev = elevation / 32768.0;
+ }
+ float a = sin(M_PI * normLng);
+ float b = sin(M_PI * normLat);
+ float c = cos(M_PI * normLng);
+ float d = cos(M_PI * normLat);
+
+ return @[ @(a), @(b), @(c), @(d), @(normElev) ];
+}
+
+- (MLMultiArray *)predictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
+ if (!self.cachedGeoResult ||
+ fabs(latitude - self.cachedLatitude) > self.locationChangeThreshold ||
+ fabs(longitude - self.cachedLongitude) > self.locationChangeThreshold ||
+ fabs(elevation - self.cachedElevation) > self.locationChangeThreshold)
+ {
+ self.cachedGeoResult = [self geoModelPredictionsForLat:latitude lng:longitude elevation:elevation];
+ self.cachedLatitude = latitude;
+ self.cachedLongitude = longitude;
+ self.cachedElevation = elevation;
+ }
+
+ return self.cachedGeoResult;
+}
+
+- (MLMultiArray *)geoModelPredictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
+ NSArray *geoModelInputs = [self normAndEncodeLat:latitude
+ lng:longitude
+ elevation:elevation];
+
+ NSError *err = nil;
+ MLMultiArray *mlInputs = [[MLMultiArray alloc] initWithShape:@[@1, @5]
+ dataType:MLMultiArrayDataTypeDouble
+ error:&err];
+ for (int i = 0; i < 5; i++) {
+ mlInputs[i] = geoModelInputs[i];
+ }
+ MLFeatureValue *fv = [MLFeatureValue featureValueWithMultiArray:mlInputs];
+
+ NSError *fpError = nil;
+ NSDictionary *fpDict = @{ @"input_1": fv };
+ MLDictionaryFeatureProvider *fp = [[MLDictionaryFeatureProvider alloc] initWithDictionary:fpDict
+ error:&fpError];
+
+ NSError *predError = nil;
+ id results = [self.geoModel predictionFromFeatures:fp error:&predError];
+ MLFeatureValue *result = [results featureValueForName:@"Identity"];
+ MLMultiArray *geoModelScores = result.multiArrayValue;
+
+ return geoModelScores;
+}
+
+@end
diff --git a/ios/Classifier/VCPTaxonomy.m b/ios/Classifier/VCPTaxonomy.m
index 2cbd9d0..2cf6174 100644
--- a/ios/Classifier/VCPTaxonomy.m
+++ b/ios/Classifier/VCPTaxonomy.m
@@ -16,6 +16,7 @@ @interface VCPTaxonomy ()
// this is a convenience array for testing
@property NSArray *leaves;
@property VCPNode *life;
+@property float taxonomyRollupCutoff;
@end
@implementation VCPTaxonomy
@@ -83,6 +84,8 @@ - (instancetype)initWithTaxonomyFile:(NSString *)taxaFile {
[self.life addChild:node];
}
}
+
+ self.taxonomyRollupCutoff = 0.01;
}
return self;
@@ -130,35 +133,34 @@ - (VCPPrediction *)inflateTopPredictionFromClassification:(MLMultiArray *)classi
return nil;
}
-// following
-// https://github.com/inaturalist/inatVisionAPI/blob/multiclass/inferrers/multi_class_inferrer.py#L136
- (NSDictionary *)aggregateScores:(MLMultiArray *)classification currentNode:(VCPNode *)node {
- if (node.children.count > 0) {
- // we'll populate this and return it
- NSMutableDictionary *allScores = [NSMutableDictionary dictionary];
-
- for (VCPNode *child in node.children) {
- NSDictionary *childScores = [self aggregateScores:classification currentNode:child];
- [allScores addEntriesFromDictionary:childScores];
- }
+ NSMutableDictionary *allScores = [NSMutableDictionary dictionary];
+ if (node.children.count > 0) {
float thisScore = 0.0f;
for (VCPNode *child in node.children) {
- thisScore += [allScores[child.taxonId] floatValue];
+ NSDictionary *childScores = [self aggregateScores:classification currentNode:child];
+ NSNumber *childScore = childScores[child.taxonId];
+
+ if ([childScore floatValue] > self.taxonomyRollupCutoff) {
+ [allScores addEntriesFromDictionary:childScores];
+ thisScore += [childScore floatValue];
+ }
}
-
allScores[node.taxonId] = @(thisScore);
- return [NSDictionary dictionaryWithDictionary:allScores];
} else {
// base case, no children
NSAssert(node.leafId, @"node with taxonId %@ has no children but also has no leafId", node.taxonId);
NSNumber *leafScore = [classification objectAtIndexedSubscript:node.leafId.integerValue];
NSAssert(leafScore, @"node with leafId %@ has no score", node.leafId);
- return @{
- node.taxonId: leafScore
- };
+
+ if ([leafScore floatValue] > self.taxonomyRollupCutoff) {
+ allScores[node.taxonId] = leafScore;
+ }
}
+
+ return [allScores copy];
}
- (NSDictionary *)aggregateScores:(MLMultiArray *)classification {
diff --git a/ios/Classifier/VCPVisionModel.h b/ios/Classifier/VCPVisionModel.h
new file mode 100644
index 0000000..e0ebd52
--- /dev/null
+++ b/ios/Classifier/VCPVisionModel.h
@@ -0,0 +1,29 @@
+//
+// VCPVisionModel.h
+// VisionCameraPluginInatVision
+//
+// Created by Alex Shepard on 10/18/24.
+// Copyright © 2024 iNaturalist. All rights reserved.
+//
+
+@import CoreML;
+@import Vision;
+
+#import
+
+NS_ASSUME_NONNULL_BEGIN
+
+@interface VCPVisionModel : NSObject
+
+- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath;
+- (MLMultiArray * _Nullable)visionPredictionsFor:(CVPixelBufferRef)pixBuf orientation:(UIImageOrientation)orient;
+
+@property MLModel *cvModel;
+@property VNCoreMLModel *visionModel;
+
+@property VNCoreMLRequest *classification;
+@property NSArray *requests;
+
+@end
+
+NS_ASSUME_NONNULL_END
diff --git a/ios/Classifier/VCPVisionModel.m b/ios/Classifier/VCPVisionModel.m
new file mode 100644
index 0000000..dc317b3
--- /dev/null
+++ b/ios/Classifier/VCPVisionModel.m
@@ -0,0 +1,77 @@
+//
+// VCPVisionModel.m
+// VisionCameraPluginInatVision
+//
+// Created by Alex Shepard on 10/18/24.
+// Copyright © 2024 iNaturalist. All rights reserved.
+//
+
+#import "VCPVisionModel.h"
+
+@implementation VCPVisionModel
+
+- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath {
+ if (self = [super init]) {
+ NSURL *visionModelUrl = [NSURL fileURLWithPath:modelPath];
+ if (!visionModelUrl) {
+ NSLog(@"no file for vision model");
+ return nil;
+ }
+
+ NSError *loadError = nil;
+ self.cvModel = [MLModel modelWithContentsOfURL:visionModelUrl error:&loadError];
+ if (loadError) {
+ NSString *errString = [NSString stringWithFormat:@"error loading cv model: %@",
+ loadError.localizedDescription];
+ NSLog(@"%@", errString);
+ return nil;
+ }
+ if (!self.cvModel) {
+ NSLog(@"unable to make cv model");
+ return nil;
+ }
+
+ NSError *modelError = nil;
+ self.visionModel = [VNCoreMLModel modelForMLModel:self.cvModel
+ error:&modelError];
+
+ self.classification = [[VNCoreMLRequest alloc] initWithModel:self.visionModel];
+ self.classification.imageCropAndScaleOption = VNImageCropAndScaleOptionCenterCrop;
+ self.requests = @[ self.classification ];
+ }
+
+ return self;
+}
+
+- (MLMultiArray * _Nullable)visionPredictionsFor:(CVPixelBufferRef)pixBuf orientation:(UIImageOrientation)orient {
+ CGImagePropertyOrientation cgOrient = [self cgOrientationFor:orient];
+ VNImageRequestHandler *handler = [[VNImageRequestHandler alloc] initWithCVPixelBuffer:pixBuf
+ orientation:cgOrient
+ options:@{}];
+
+ NSError *requestError = nil;
+ [handler performRequests:self.requests
+ error:&requestError];
+
+ VNCoreMLRequest *request = self.requests.firstObject;
+ VNCoreMLFeatureValueObservation *firstResult = request.results.firstObject;
+ MLFeatureValue *firstFV = firstResult.featureValue;
+
+ return firstFV.multiArrayValue;
+}
+
+- (CGImagePropertyOrientation)cgOrientationFor:(UIImageOrientation)uiOrientation {
+ switch (uiOrientation) {
+ case UIImageOrientationUp: return kCGImagePropertyOrientationUp;
+ case UIImageOrientationDown: return kCGImagePropertyOrientationDown;
+ case UIImageOrientationLeft: return kCGImagePropertyOrientationLeft;
+ case UIImageOrientationRight: return kCGImagePropertyOrientationRight;
+ case UIImageOrientationUpMirrored: return kCGImagePropertyOrientationUpMirrored;
+ case UIImageOrientationDownMirrored: return kCGImagePropertyOrientationDownMirrored;
+ case UIImageOrientationLeftMirrored: return kCGImagePropertyOrientationLeftMirrored;
+ case UIImageOrientationRightMirrored: return kCGImagePropertyOrientationRightMirrored;
+ }
+}
+
+@end
+
diff --git a/ios/VisionCameraPluginInatVision.xcodeproj/project.pbxproj b/ios/VisionCameraPluginInatVision.xcodeproj/project.pbxproj
index 29b649e..5fc7c68 100644
--- a/ios/VisionCameraPluginInatVision.xcodeproj/project.pbxproj
+++ b/ios/VisionCameraPluginInatVision.xcodeproj/project.pbxproj
@@ -13,6 +13,8 @@
8F3C41832A4AFF5B008FBC67 /* VCPClassifier.m in Sources */ = {isa = PBXBuildFile; fileRef = 8F3C417C2A4AFF5B008FBC67 /* VCPClassifier.m */; };
8F3C41842A4AFF5B008FBC67 /* VCPTaxonomy.m in Sources */ = {isa = PBXBuildFile; fileRef = 8F3C41802A4AFF5B008FBC67 /* VCPTaxonomy.m */; };
8F3C41852A4AFF5B008FBC67 /* VCPPrediction.m in Sources */ = {isa = PBXBuildFile; fileRef = 8F3C41812A4AFF5B008FBC67 /* VCPPrediction.m */; };
+ FA5FF9612CC317E400BA8E22 /* VCPGeoModel.m in Sources */ = {isa = PBXBuildFile; fileRef = FA5FF9602CC317E400BA8E22 /* VCPGeoModel.m */; };
+ FA5FF9642CC3182D00BA8E22 /* VCPVisionModel.m in Sources */ = {isa = PBXBuildFile; fileRef = FA5FF9632CC3182D00BA8E22 /* VCPVisionModel.m */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@@ -39,6 +41,10 @@
8F3C417F2A4AFF5B008FBC67 /* VCPTaxonomy.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = VCPTaxonomy.h; sourceTree = ""; };
8F3C41802A4AFF5B008FBC67 /* VCPTaxonomy.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = VCPTaxonomy.m; sourceTree = ""; };
8F3C41812A4AFF5B008FBC67 /* VCPPrediction.m */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.objc; path = VCPPrediction.m; sourceTree = ""; };
+ FA5FF95F2CC317E400BA8E22 /* VCPGeoModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = VCPGeoModel.h; sourceTree = ""; };
+ FA5FF9602CC317E400BA8E22 /* VCPGeoModel.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = VCPGeoModel.m; sourceTree = ""; };
+ FA5FF9622CC3182D00BA8E22 /* VCPVisionModel.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; path = VCPVisionModel.h; sourceTree = ""; };
+ FA5FF9632CC3182D00BA8E22 /* VCPVisionModel.m */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.objc; path = VCPVisionModel.m; sourceTree = ""; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@@ -89,6 +95,10 @@
8F3C41812A4AFF5B008FBC67 /* VCPPrediction.m */,
8F3C417F2A4AFF5B008FBC67 /* VCPTaxonomy.h */,
8F3C41802A4AFF5B008FBC67 /* VCPTaxonomy.m */,
+ FA5FF95F2CC317E400BA8E22 /* VCPGeoModel.h */,
+ FA5FF9602CC317E400BA8E22 /* VCPGeoModel.m */,
+ FA5FF9622CC3182D00BA8E22 /* VCPVisionModel.h */,
+ FA5FF9632CC3182D00BA8E22 /* VCPVisionModel.m */,
);
path = Classifier;
sourceTree = "";
@@ -153,7 +163,9 @@
8F3C41852A4AFF5B008FBC67 /* VCPPrediction.m in Sources */,
8F3C41832A4AFF5B008FBC67 /* VCPClassifier.m in Sources */,
1DB58401D995067FF278746C /* VisionCameraPluginInatVision.m in Sources */,
+ FA5FF9612CC317E400BA8E22 /* VCPGeoModel.m in Sources */,
8F3C41842A4AFF5B008FBC67 /* VCPTaxonomy.m in Sources */,
+ FA5FF9642CC3182D00BA8E22 /* VCPVisionModel.m in Sources */,
8F3C41822A4AFF5B008FBC67 /* VCPNode.m in Sources */,
8F2D62CB2B1B9DD500412573 /* VisionCameraPluginInatVisionModule.m in Sources */,
);
diff --git a/ios/VisionCameraPluginInatVision/VisionCameraPluginInatVision.m b/ios/VisionCameraPluginInatVision/VisionCameraPluginInatVision.m
index 158432f..eda02d2 100644
--- a/ios/VisionCameraPluginInatVision/VisionCameraPluginInatVision.m
+++ b/ios/VisionCameraPluginInatVision/VisionCameraPluginInatVision.m
@@ -6,157 +6,198 @@
@import UIKit;
@import Vision;
@import CoreML;
+@import Accelerate;
+@import CoreGraphics;
#import "VCPTaxonomy.h"
#import "VCPPrediction.h"
+#import "VCPGeoModel.h"
+#import "VCPVisionModel.h"
@interface VisionCameraPluginInatVisionPlugin : FrameProcessorPlugin
-+ (VCPTaxonomy*) taxonomyWithTaxonomyFile:(NSString*)taxonomyPath;
-+ (VNCoreMLModel*) visionModelWithModelFile:(NSString*)modelPath;
++ (VCPTaxonomy *) taxonomyWithTaxonomyFile:(NSString *)taxonomyPath;
++ (VCPGeoModel *)geoModelWithModelFile:(NSString *)geoModelPath;
++ (VCPVisionModel *)visionModelWithModelFile:(NSString *)modelPath;
@end
@implementation VisionCameraPluginInatVisionPlugin
-+ (VCPTaxonomy*) taxonomyWithTaxonomyFile:(NSString*)taxonomyPath {
- static VCPTaxonomy* taxonomy = nil;
- if (taxonomy == nil) {
- taxonomy = [[VCPTaxonomy alloc] initWithTaxonomyFile:taxonomyPath];
- }
- return taxonomy;
++ (VCPTaxonomy *)taxonomyWithTaxonomyFile:(NSString *)taxonomyPath {
+ static VCPTaxonomy *taxonomy = nil;
+ if (taxonomy == nil) {
+ taxonomy = [[VCPTaxonomy alloc] initWithTaxonomyFile:taxonomyPath];
+ }
+ return taxonomy;
}
-+ (VNCoreMLModel*) visionModelWithModelFile:(NSString*)modelPath {
- static VNCoreMLModel* visionModel = nil;
- if (visionModel == nil) {
- // Setup vision
- NSURL *modelUrl = [NSURL fileURLWithPath:modelPath];
- if (!modelUrl) {
- // TODO: handle this error
- // [self.delegate classifierError:@"no file for optimized model"];
- NSLog(@"no file for optimized model");
- return nil;
++ (VCPGeoModel *)geoModelWithModelFile:(NSString *)modelPath {
+ static VCPGeoModel *geoModel = nil;
+
+ if (geoModel == nil) {
+ geoModel = [[VCPGeoModel alloc] initWithModelPath:modelPath];
}
+
+ return geoModel;
+}
- NSError *loadError = nil;
- MLModel *model = [MLModel modelWithContentsOfURL:modelUrl
- error:&loadError];
- if (loadError) {
- NSString *errString = [NSString stringWithFormat:@"error loading model: %@",
- loadError.localizedDescription];
- NSLog(@"%@", errString);
- // TODO: handle this error
- // [self.delegate classifierError:errString];
- return nil;
- }
- if (!model) {
- // TODO: handle this error
- // [self.delegate classifierError:@"unable to make model"];
- NSLog(@"unable to make model");
- return nil;
++ (VCPVisionModel *)visionModelWithModelFile:(NSString *)modelPath {
+ static VCPVisionModel *cvModel = nil;
+
+ if (cvModel == nil) {
+ cvModel = [[VCPVisionModel alloc] initWithModelPath:modelPath];
}
+
+ return cvModel;
+}
+
+- (instancetype)initWithProxy:(VisionCameraProxyHolder*)proxy
+ withOptions:(NSDictionary* _Nullable)options {
+ self = [super initWithProxy:proxy withOptions:options];
+ return self;
+}
- NSError *modelError = nil;
- visionModel = [VNCoreMLModel modelForMLModel:model
- error:&modelError];
- if (modelError) {
- NSString *errString = [NSString stringWithFormat:@"error making vision model: %@",
- modelError.localizedDescription];
- // [self.delegate classifierError:errString];
- NSLog(@"%@", errString);
+- (MLMultiArray * _Nullable)combineVisionScores:(MLMultiArray *)visionScores with:(MLMultiArray *)geoScores error:(NSError **)error {
+ // Ensure both arrays have the same shape
+ if (![visionScores.shape isEqualToArray:geoScores.shape]) {
+ NSDictionary *userInfo = @{
+ NSLocalizedDescriptionKey: @"Arrays must have the same shape",
+ };
+ *error = [NSError errorWithDomain:@"MLMultiArrayErrorDomain"
+ code:1
+ userInfo:userInfo];
return nil;
}
- if (!visionModel) {
- // [self.delegate classifierError:@"unable to make vision model"];
- NSLog(@"unable to make vision model");
+
+ // Create a result MLMultiArray with the same shape as the input arrays
+ MLMultiArray *combinedArray = [[MLMultiArray alloc] initWithShape:visionScores.shape
+ dataType:MLMultiArrayDataTypeFloat32
+ error:error];
+ if (!combinedArray) {
+ NSDictionary *userInfo = @{
+ NSLocalizedDescriptionKey: @"Failed to make combined array",
+ };
+ *error = [NSError errorWithDomain:@"MLMultiArrayErrorDomain"
+ code:2
+ userInfo:userInfo];
return nil;
}
- }
- return visionModel;
+
+ // Get the data pointers
+ float *visionData = (float *)visionScores.dataPointer;
+ float *geoData = (float *)geoScores.dataPointer;
+ float *combinedData = (float *)combinedArray.dataPointer;
+
+ // Get the number of elements
+ NSInteger count = visionScores.count;
+
+ // Perform element-wise multiplication using vDSP_vmul
+ vDSP_vmul(visionData, 1, geoData, 1, combinedData, 1, count);
+
+ return combinedArray;
}
-- (instancetype)initWithProxy:(VisionCameraProxyHolder*)proxy
- withOptions:(NSDictionary* _Nullable)options {
- self = [super initWithProxy:proxy withOptions:options];
- return self;
+- (MLMultiArray *)normalizeMultiArray:(MLMultiArray *)mlArray error:(NSError **)error {
+ NSInteger count = mlArray.count;
+ float *mlData = (float *)mlArray.dataPointer;
+
+ float sum = 0.0;
+ vDSP_sve(mlData, 1, &sum, count);
+
+ if (sum != 0) {
+ vDSP_vsdiv(mlData, 1, &sum, mlData, 1, count);
+ } else {
+ NSDictionary *userInfo = @{
+ NSLocalizedDescriptionKey: @"Sum of elements is zero, normalization not possible."
+ };
+ *error = [NSError errorWithDomain:@"MLMultiArrayErrorDomain"
+ code:3
+ userInfo:userInfo];
+ return nil;
+ }
+
+ return mlArray;
}
- (id)callback:(Frame*)frame withArguments:(NSDictionary*)arguments {
- // Start timestamp
- NSDate *startDate = [NSDate date];
-
- // Log arguments
- NSLog(@"inatVision arguments: %@", arguments);
- // Destructure version out of options
- NSString* version = arguments[@"version"];
- // Destructure model path out of options
- NSString* modelPath = arguments[@"modelPath"];
- // Destructure taxonomy path out of options
- NSString* taxonomyPath = arguments[@"taxonomyPath"];
-
- CMSampleBufferRef buffer = frame.buffer;
- UIImageOrientation orientation = frame.orientation;
-
- CVImageBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(buffer);
- if (!pixelBuffer) {
- NSLog(@"unable to get pixel buffer");
- return nil;
- }
-
- // Setup taxonomy
- VCPTaxonomy *taxonomy = [VisionCameraPluginInatVisionPlugin taxonomyWithTaxonomyFile:taxonomyPath];
-
- // Setup vision model
- VNCoreMLModel *visionModel = [VisionCameraPluginInatVisionPlugin visionModelWithModelFile:modelPath];
-
- // Setup top branches
- NSMutableArray *topBranches = [NSMutableArray array];
- VNRequestCompletionHandler recognitionHandler = ^(VNRequest * _Nonnull request, NSError * _Nullable error) {
- VNCoreMLFeatureValueObservation *firstResult = request.results.firstObject;
- MLFeatureValue *firstFV = firstResult.featureValue;
- MLMultiArray *mm = firstFV.multiArrayValue;
-
- NSArray *bestBranch = [taxonomy inflateTopBranchFromClassification:mm];
+ // Start timestamp
+ NSDate *startDate = [NSDate date];
+
+ MLMultiArray *geoModelPreds = nil;
+ if ([arguments objectForKey:@"useGeoModel"] &&
+ [[arguments objectForKey:@"useGeoModel"] boolValue] &&
+ [arguments objectForKey:@"latitude"] &&
+ [arguments objectForKey:@"longitude"] &&
+ [arguments objectForKey:@"elevation"] &&
+ [arguments objectForKey:@"geoModelPath"])
+ {
+ VCPGeoModel *geoModel = [VisionCameraPluginInatVisionPlugin geoModelWithModelFile:arguments[@"geoModelPath"]];
+ geoModelPreds = [geoModel predictionsForLat:[[arguments objectForKey:@"latitude"] floatValue]
+ lng:[[arguments objectForKey:@"longitude"] floatValue]
+ elevation:[[arguments objectForKey:@"elevation"] floatValue]];
+ } else {
+ NSLog(@"not doing anything geo related.");
+ }
+
+ // Log arguments
+ NSLog(@"inatVision arguments: %@", arguments);
+ // Destructure version out of options
+ NSString* version = arguments[@"version"];
+ // Destructure model path out of options
+ NSString* modelPath = arguments[@"modelPath"];
+ // Destructure taxonomy path out of options
+ NSString* taxonomyPath = arguments[@"taxonomyPath"];
+
+ CMSampleBufferRef buffer = frame.buffer;
+ CVImageBufferRef pixelBuffer = CMSampleBufferGetImageBuffer(buffer);
+ UIImageOrientation orientation = frame.orientation;
+
+ VCPVisionModel *cvModel = [VisionCameraPluginInatVisionPlugin visionModelWithModelFile:modelPath];
+ MLMultiArray *visionScores = [cvModel visionPredictionsFor:pixelBuffer orientation:orientation];
+
+ MLMultiArray *results = nil;
+
+
+ if (geoModelPreds != nil) {
+ NSError *err = nil;
+ results = [self combineVisionScores:visionScores with:geoModelPreds error:&err];
+ results = [self normalizeMultiArray:results error:&err];
+ } else {
+ results = visionScores;
+ }
+
+
+ // Setup taxonomy
+ VCPTaxonomy *taxonomy = [VisionCameraPluginInatVisionPlugin taxonomyWithTaxonomyFile:taxonomyPath];
+
+ NSMutableArray *topBranches = [NSMutableArray array];
+ NSArray *bestBranch = [taxonomy inflateTopBranchFromClassification:results];
// add this to the end of the recent top branches array
[topBranches addObject:bestBranch];
- };
-
- VNCoreMLRequest *objectRecognition = [[VNCoreMLRequest alloc] initWithModel:visionModel
- completionHandler:recognitionHandler];
- objectRecognition.imageCropAndScaleOption = VNImageCropAndScaleOptionCenterCrop;
- NSArray *requests = @[objectRecognition];
-
- VNImageRequestHandler *handler = [[VNImageRequestHandler alloc] initWithCVPixelBuffer:pixelBuffer
- orientation:orientation
- options:@{}];
- NSError *requestError = nil;
- [handler performRequests:requests
- error:&requestError];
- if (requestError) {
- NSString *errString = [NSString stringWithFormat:@"got a request error: %@",
- requestError.localizedDescription];
- NSLog(@"%@", errString);
- return nil;
- }
-
- // convert the VCPPredictions in the bestRecentBranch into dicts
- NSMutableArray *bestBranchAsDict = [NSMutableArray array];
- for (VCPPrediction *prediction in topBranches.firstObject) {
- [bestBranchAsDict addObject:[prediction asDict]];
- }
-
- // Create a new dictionary with the bestBranchAsDict under the key "predictions"
- NSDictionary *response = [NSDictionary dictionary];
- response = @{@"predictions": bestBranchAsDict};
-
- // End timestamp
- NSTimeInterval timeElapsed = [[NSDate date] timeIntervalSinceDate:startDate];
- NSLog(@"inatVision took %f seconds", timeElapsed);
-
- return response;
+
+ // convert the VCPPredictions in the bestRecentBranch into dicts
+ NSMutableArray *bestBranchAsDict = [NSMutableArray array];
+ for (VCPPrediction *prediction in topBranches.firstObject) {
+ [bestBranchAsDict addObject:[prediction asDict]];
+ }
+
+ NSTimeInterval timeElapsed = [[NSDate date] timeIntervalSinceDate:startDate];
+ NSLog(@"inatVision took %f seconds", timeElapsed);
+
+ // Create a new dictionary with the bestBranchAsDict under the key "predictions"
+ NSDictionary *response = [NSDictionary dictionary];
+ response = @{
+ @"predictions": bestBranchAsDict,
+ @"timeElapsed": @(timeElapsed),
+ };
+
+ // End timestamp
+
+ return response;
}
VISION_EXPORT_FRAME_PROCESSOR(VisionCameraPluginInatVisionPlugin, inatVision)
@end
+
diff --git a/src/index.tsx b/src/index.tsx
index 9ce5599..7961344 100644
--- a/src/index.tsx
+++ b/src/index.tsx
@@ -299,7 +299,7 @@ interface Options {
*/
version: string;
/**
- * The path to the model file.
+ * The path to the computer vision model file.
*/
modelPath: string;
/**
@@ -339,6 +339,31 @@ interface Options {
* As a fraction of 1. E.g. 0.8 will crop the center 80% of the frame before sending it to the cv model.
*/
cropRatio?: number;
+ /**
+ *
+ * Whether to use the geo model.
+ */
+ useGeoModel?: boolean;
+ /**
+ *
+ * The latitude of the location.
+ */
+ latitude?: number;
+ /**
+ *
+ * The longitude of the location.
+ */
+ longitude?: number;
+ /**
+ *
+ * The elevation of the location.
+ */
+ elevation?: number;
+ /**
+ *
+ * The path to the geo model file.
+ */
+ geoModelPath?: string;
// Patches
/**
* Currently, using react-native-vision-camera v3.9.1, Android does not support orientation changes.