Improve documentation

Kalior · Aug 24, 2018 · ba5f1d1 · ba5f1d1
1 parent 948fe59
commit ba5f1d1
Show file tree

Hide file tree

Showing 4 changed files with 32 additions and 15 deletions.
diff --git a/action_recognition/analysis/labelling.py b/action_recognition/analysis/labelling.py
@@ -157,7 +157,7 @@ def pseudo_automatic_labelling(self, timestamps, frames_per_chunk, video, tracks
         frames : numpy.array of the frames for the labelled chunks
         labels : numpy.array of the labels for each chunk
         indicies : numpy.array of the index of the track for every chunk.
-            needed for reproducability.
+            needed for reproducibility.
         """
         keypoints = tracks[0][0].keypoints
         chunk_shape = (frames_per_chunk, *keypoints.shape)
@@ -201,7 +201,7 @@ def _pseudo_automatic_labelling(self, timestamp, track, track_index, frames_per_
 
             self.visualiser.draw_video_with_tracks([track], video, end_frame, start_frame)
 
-            ok = input("Labelling as {}, ok? (y/n/s)".format(timestamp['label']))
+            ok = input("Labelling as {}, ok? (yes/(n)o/(s)kip forward)".format(timestamp['label']))
             if ok == 'y' or ok == '':
                 chunk, chunk_frames = track.chunk_from_frame(start_frame, frames_per_chunk)
 
@@ -321,7 +321,7 @@ def manual_labelling(self, video, processor, target_frames_per_chunk, overlap_pe
         frames : numpy.array of the frames for the labelled chunks
         labels : numpy.array of the labels for each chunk
         indicies : numpy.array of the index of the track for every chunk.
-            needed for reproducability.
+            needed for reproducibility.
         """
         capture = cv2.VideoCapture(video)
         fps = capture.get(cv2.CAP_PROP_FPS)

diff --git a/action_recognition/classifiers/ensemble_classifier.py b/action_recognition/classifiers/ensemble_classifier.py
@@ -17,8 +17,17 @@ class EnsembleClassifier(BaseEstimator, ClassifierMixin):
 
     *Note*: Can only use one thread, since some parts of sklearn_tda vectorisations
     are not pickable.
+
+    Parameters
+    ----------
+    use_tda_vecorisations : boolean, optional, default=False
+        Specifies if the vectorisations from sklearn_tda should be
+        part of the feature_engineering pipeline.
     """
 
+    def __init__(self, use_tda_vectorisations=False):
+        self.use_tda_vectorisations = use_tda_vectorisations
+
     def fit(self, X, y, **fit_params):
         """Fit the model.
 
@@ -38,9 +47,10 @@ def fit(self, X, y, **fit_params):
         """
         sliced_wasserstein_classifier = TDAClassifier(cross_validate=False)
 
-        feature_union_classifier = FeatureEngineeringClassifier()
+        feature_union_classifier = FeatureEngineeringClassifier(
+            use_tda_vectorisations=self.use_tda_vectorisations)
 
-        # Can't use multiple jobs because lambda in persistence image isn't pickable
+        # Can't use multiple jobs since the lambdas in some parts of sklearn_tda aren't pickable
         classifier = VotingClassifier(estimators=[
             ("Union", feature_union_classifier),
             ("SWKernel", sliced_wasserstein_classifier)

diff --git a/create_dataset.py b/create_dataset.py
@@ -34,7 +34,7 @@ def main(args):
 
         chunks, frames, labels = process_tracks(
             tracks_file, video, frames_per_chunk, overlap_percentage,
-            seconds_per_chunk, args.filter_moving)
+            seconds_per_chunk)
 
         videos = np.array([video] * len(chunks))
 
@@ -98,7 +98,7 @@ def split_data(chunks, frames, labels, videos):
         (test_chunks, test_frames, test_labels, test_videos)
 
 
-def process_tracks(tracks_file, video, target_frames_per_chunk, overlap_percentage, seconds_per_chunk, automatic_moving_filter):
+def process_tracks(tracks_file, video, target_frames_per_chunk, overlap_percentage, seconds_per_chunk):
     tracks_npz = np.load(tracks_file)
     np_tracks = tracks_npz['tracks']
     np_frames = tracks_npz['frames']
@@ -139,17 +139,17 @@ def process_tracks(tracks_file, video, target_frames_per_chunk, overlap_percenta
 
 
 if __name__ == '__main__':
-    parser = argparse.ArgumentParser(description='Dataset creation for analysis of tracks.')
+    parser = argparse.ArgumentParser(
+        description='Dataset creation/labelling for action recognition.')
     parser.add_argument('--videos', type=str, nargs='+',
-                        help='The video from which the paths were generated.')
+                        help='The videos/folders from which the paths were generated.')
     parser.add_argument('--tracks-files', type=str, nargs='+',
-                        help='The file with the saved tracks.')
+                        help='The files/folders with the saved tracks.')
     parser.add_argument('--out-file', type=str, default='dataset/dataset.npz',
                         help='The path to the file where the data will be saved')
     parser.add_argument('--append', action='store_true',
-                        help='Specify if the data should be added to the out-file (if it exists) or overwritten.')
-    parser.add_argument('--filter-moving', action='store_true',
-                        help='Specify if you want to automatically filter chunks with large movement.')
+                        help=('Specify if the data should be added to the out-file '
+                              '(if it exists) or overwritten.'))
 
     logging.basicConfig(level=logging.DEBUG)
     args = parser.parse_args()

diff --git a/train_classifier.py b/train_classifier.py
@@ -31,10 +31,11 @@ def main(args):
         classifier = TDAClassifier(cross_validate=args.cross_validate)
         train_classifier(train, test, args.title, classifier)
     if args.ensemble:
-        classifier = EnsembleClassifier()
+        classifier = EnsembleClassifier(use_tda_vectorisations=args.use_tda_vectorisations)
         train_classifier(train, test, args.title, classifier)
     if args.feature_engineering:
-        classifier = FeatureEngineeringClassifier()
+        classifier = FeatureEngineeringClassifier(
+            use_tda_vectorisations=args.use_tda_vectorisations)
         train_classifier(train, test, args.title, classifier)
 
 
@@ -93,6 +94,12 @@ def train_classifier(train, test, title, classifier):
     parser.add_argument('--cross-validate', '-cv', action='store_true',
                         help='Specify for cross-validation of tda pipeline.')
 
+    parser.add_argument('--use-tda-vectorisations', action='store_true',
+                        help=('Specify for if the feature engineering and ensemble classifiers '
+                              'should make use of the tda vectorisations from sklearn_tda. '
+                              'Note that this will cause the model saving to file to crash '
+                              'since parts of the tda vectorisations are not pickable.'))
+
     logging.basicConfig(level=logging.DEBUG)
     args = parser.parse_args()