Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

vision camera improvements for iOS #34

Merged
merged 18 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion example/src/App.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,20 @@ import * as InatVision from 'vision-camera-plugin-inatvision';

const modelFilenameAndroid = 'small_inception_tf1.tflite';
const taxonomyFilenameAndroid = 'small_export_tax.csv';
const geoModelFilenameAndroid = 'not_implemented';
const modelFilenameIOS = 'small_inception_tf1.mlmodelc';
const taxonomyFilenameIOS = 'small_export_tax.json';
const geoModelFilenameIOS = 'small_geomodel.mlmodelc';
const modelVersion = '1.0';

const modelPath =
Platform.OS === 'ios'
? `${RNFS.DocumentDirectoryPath}/${modelFilenameIOS}`
: `${RNFS.DocumentDirectoryPath}/${modelFilenameAndroid}`;
const geoModelPath =
Platform.OS === 'ios'
? `${RNFS.DocumentDirectoryPath}/${geoModelFilenameIOS}`
: `${RNFS.DocumentDirectoryPath}/${geoModelFilenameAndroid}`;
const taxonomyPath =
Platform.OS === 'ios'
? `${RNFS.DocumentDirectoryPath}/${taxonomyFilenameIOS}`
Expand All @@ -49,6 +55,7 @@ export default function App(): React.JSX.Element {
undefined
);
const [negativeFilter, setNegativeFilter] = useState(false);
const [useGeoModel, setUseGeoModel] = useState(false);

enum VIEW_STATUS {
NONE,
Expand All @@ -66,6 +73,10 @@ export default function App(): React.JSX.Element {
setNegativeFilter(!negativeFilter);
};

const toggleUseGeoModel = () => {
setUseGeoModel(!useGeoModel);
};

const changeFilterByTaxonId = () => {
if (!filterByTaxonId) {
setFilterByTaxonId('47126');
Expand Down Expand Up @@ -110,6 +121,16 @@ export default function App(): React.JSX.Element {
.catch((error) => {
console.log(`error moving model file`, error);
});
RNFS.copyFile(
`${RNFS.MainBundlePath}/${geoModelFilenameIOS}`,
`${RNFS.DocumentDirectoryPath}/${geoModelFilenameIOS}`
)
.then((result) => {
console.log(`moved geo model file from`, result);
})
.catch((error) => {
console.log(`error moving geo model file`, error);
});
RNFS.copyFile(
`${RNFS.MainBundlePath}/${taxonomyFilenameIOS}`,
`${RNFS.DocumentDirectoryPath}/${taxonomyFilenameIOS}`
Expand Down Expand Up @@ -147,6 +168,11 @@ export default function App(): React.JSX.Element {
'worklet';
try {
const timeBefore = new Date().getTime();

const latitude = 37.28889;
alexshepard marked this conversation as resolved.
Show resolved Hide resolved
const longitude = -121.94415;
const elevation = 15.0;

const cvResult: InatVision.Result = InatVision.inatVision(frame, {
version: modelVersion,
modelPath,
Expand All @@ -156,6 +182,11 @@ export default function App(): React.JSX.Element {
negativeFilter,
numStoredResults: 4,
cropRatio: 0.9,
latitude,
longitude,
elevation,
geoModelPath,
useGeoModel,
patchedOrientationAndroid: 'portrait',
});
const timeAfter = new Date().getTime();
Expand All @@ -167,7 +198,13 @@ export default function App(): React.JSX.Element {
}
});
},
[confidenceThreshold, filterByTaxonId, negativeFilter, handleResults]
[
confidenceThreshold,
filterByTaxonId,
negativeFilter,
handleResults,
useGeoModel,
]
);

function selectImage() {
Expand Down Expand Up @@ -316,6 +353,10 @@ export default function App(): React.JSX.Element {
onPress={() => setViewStatus(VIEW_STATUS.NONE)}
title="Close"
/>
<Button
onPress={toggleUseGeoModel}
title={useGeoModel ? 'Disable Geo Model' : 'Enable Geo Model'}
/>
</View>
</View>
) : (
Expand Down
24 changes: 24 additions & 0 deletions ios/Classifier/VCPGeoModel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//
// VCPGeoModel.h
// VisionCameraPluginInatVision
//
// Created by Alex Shepard on 10/18/24.
// Copyright © 2024 iNaturalist. All rights reserved.
//

#import <Foundation/Foundation.h>
@import CoreML;

NS_ASSUME_NONNULL_BEGIN

@interface VCPGeoModel : NSObject

- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath;
- (MLMultiArray *)predictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation;

@property MLModel *geoModel;
@property float locationChangeThreshold;

@end

NS_ASSUME_NONNULL_END
115 changes: 115 additions & 0 deletions ios/Classifier/VCPGeoModel.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
//
// VCPGeoModel.m
// VisionCameraPluginInatVision
//
// Created by Alex Shepard on 10/18/24.
// Copyright © 2024 iNaturalist. All rights reserved.
//

#import "VCPGeoModel.h"

@interface VCPGeoModel ()

@property (nonatomic, strong) MLMultiArray *cachedGeoResult;
@property (nonatomic) float cachedLatitude;
@property (nonatomic) float cachedLongitude;
@property (nonatomic) float cachedElevation;


@end

@implementation VCPGeoModel

- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath {
if (self = [super init]) {
NSURL *geoModelUrl = [NSURL fileURLWithPath:modelPath];
if (!geoModelUrl) {
NSLog(@"no file for geo model");
return nil;
}

NSError *loadError = nil;
self.geoModel = [MLModel modelWithContentsOfURL:geoModelUrl error:&loadError];
if (loadError) {
NSString *errString = [NSString stringWithFormat:@"error loading geo model: %@",
loadError.localizedDescription];
NSLog(@"%@", errString);
return nil;
}
if (!self.geoModel) {
NSLog(@"unable to make geo model");
return nil;
}

// default location change threshold
self.locationChangeThreshold = -0.001;

}

return self;
}

/*
* iNat geo model input normalization documented here:
* https://github.com/inaturalist/inatGeoModelTraining/tree/main#input-normalization
*/
- (NSArray *)normAndEncodeLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
alexshepard marked this conversation as resolved.
Show resolved Hide resolved
float normLat = latitude / 90.0;
float normLng = longitude / 180.0;
float normElev = 0.0;
if (elevation > 0) {
normElev = elevation / 5705.63;
} else {
normElev = elevation / 32768.0;
}
float a = sin(M_PI * normLng);
float b = sin(M_PI * normLat);
float c = cos(M_PI * normLng);
float d = cos(M_PI * normLat);

return @[ @(a), @(b), @(c), @(d), @(normElev) ];
}

- (MLMultiArray *)predictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
if (!self.cachedGeoResult ||
fabs(latitude - self.cachedLatitude) > self.locationChangeThreshold ||
alexshepard marked this conversation as resolved.
Show resolved Hide resolved
fabs(longitude - self.cachedLongitude) > self.locationChangeThreshold ||
fabs(elevation - self.cachedElevation) > self.locationChangeThreshold)
{
self.cachedGeoResult = [self geoModelPredictionsForLat:latitude lng:longitude elevation:elevation];
self.cachedLatitude = latitude;
self.cachedLongitude = longitude;
self.cachedElevation = elevation;
}

return self.cachedGeoResult;
}

- (MLMultiArray *)geoModelPredictionsForLat:(float)latitude lng:(float)longitude elevation:(float)elevation {
NSArray *geoModelInputs = [self normAndEncodeLat:latitude
lng:longitude
elevation:elevation];

NSError *err = nil;
MLMultiArray *mlInputs = [[MLMultiArray alloc] initWithShape:@[@1, @5]
dataType:MLMultiArrayDataTypeDouble
error:&err];
for (int i = 0; i < 5; i++) {
mlInputs[i] = geoModelInputs[i];
}
MLFeatureValue *fv = [MLFeatureValue featureValueWithMultiArray:mlInputs];

NSError *fpError = nil;
NSDictionary *fpDict = @{ @"input_1": fv };
MLDictionaryFeatureProvider *fp = [[MLDictionaryFeatureProvider alloc] initWithDictionary:fpDict
error:&fpError];

NSError *predError = nil;
id <MLFeatureProvider> results = [self.geoModel predictionFromFeatures:fp error:&predError];
MLFeatureValue *result = [results featureValueForName:@"Identity"];
MLMultiArray *geoModelScores = result.multiArrayValue;

return geoModelScores;
}

@end
34 changes: 18 additions & 16 deletions ios/Classifier/VCPTaxonomy.m
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ @interface VCPTaxonomy ()
// this is a convenience array for testing
@property NSArray *leaves;
@property VCPNode *life;
@property float taxonomyRollupCutoff;
@end

@implementation VCPTaxonomy
Expand Down Expand Up @@ -83,6 +84,8 @@ - (instancetype)initWithTaxonomyFile:(NSString *)taxaFile {
[self.life addChild:node];
}
}

self.taxonomyRollupCutoff = 0.01;
}

return self;
Expand Down Expand Up @@ -130,35 +133,34 @@ - (VCPPrediction *)inflateTopPredictionFromClassification:(MLMultiArray *)classi
return nil;
}

// following
// https://github.com/inaturalist/inatVisionAPI/blob/multiclass/inferrers/multi_class_inferrer.py#L136
- (NSDictionary *)aggregateScores:(MLMultiArray *)classification currentNode:(VCPNode *)node {
if (node.children.count > 0) {
// we'll populate this and return it
NSMutableDictionary *allScores = [NSMutableDictionary dictionary];

for (VCPNode *child in node.children) {
NSDictionary *childScores = [self aggregateScores:classification currentNode:child];
[allScores addEntriesFromDictionary:childScores];
}
NSMutableDictionary *allScores = [NSMutableDictionary dictionary];

if (node.children.count > 0) {
float thisScore = 0.0f;
for (VCPNode *child in node.children) {
thisScore += [allScores[child.taxonId] floatValue];
NSDictionary *childScores = [self aggregateScores:classification currentNode:child];
NSNumber *childScore = childScores[child.taxonId];

if ([childScore floatValue] > self.taxonomyRollupCutoff) {
[allScores addEntriesFromDictionary:childScores];
thisScore += [childScore floatValue];
}
}

allScores[node.taxonId] = @(thisScore);

return [NSDictionary dictionaryWithDictionary:allScores];
} else {
// base case, no children
NSAssert(node.leafId, @"node with taxonId %@ has no children but also has no leafId", node.taxonId);
NSNumber *leafScore = [classification objectAtIndexedSubscript:node.leafId.integerValue];
NSAssert(leafScore, @"node with leafId %@ has no score", node.leafId);
return @{
node.taxonId: leafScore
};

if ([leafScore floatValue] > self.taxonomyRollupCutoff) {
allScores[node.taxonId] = leafScore;
}
}

return [allScores copy];
kvangork marked this conversation as resolved.
Show resolved Hide resolved
}

- (NSDictionary *)aggregateScores:(MLMultiArray *)classification {
Expand Down
29 changes: 29 additions & 0 deletions ios/Classifier/VCPVisionModel.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//
// VCPVisionModel.h
// VisionCameraPluginInatVision
//
// Created by Alex Shepard on 10/18/24.
// Copyright © 2024 iNaturalist. All rights reserved.
//

@import CoreML;
@import Vision;

#import <Foundation/Foundation.h>

NS_ASSUME_NONNULL_BEGIN

@interface VCPVisionModel : NSObject

- (instancetype _Nullable)initWithModelPath:(NSString *)modelPath;
- (MLMultiArray * _Nullable)visionPredictionsFor:(CVPixelBufferRef)pixBuf orientation:(UIImageOrientation)orient;

@property MLModel *cvModel;
@property VNCoreMLModel *visionModel;

@property VNCoreMLRequest *classification;
@property NSArray *requests;

@end

NS_ASSUME_NONNULL_END
Loading
Loading