from threading import Event from generate_video import generate_multicam_video from aux_tools import str2bool, str_to_datetime, date_range, time_to_float, format_axis_as_timedelta, ExperimentTraverser, EXPERIMENT_DATETIME_STR_FORMAT from datetime import datetime, timedelta from matplotlib import pyplot as plt from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg from queue import Queue import os import cv2 import numpy as np import h5py import json import argparse # Import UI try: import Tkinter as tk import tkMessageBox as messagebox except ImportError: # Python 3 import tkinter as tk from tkinter import messagebox from MultiColumnListbox import MultiColumnListbox from ResizableImageCanvas import ResizableImageCanvas class VideoAndWeightHandler: TIME_INCREMENT = timedelta(seconds=0.1) # How much to shift the time offset cameras-weights from keyboard input (ASDW) FRAME_INCREMENT = 8 # How many frames to skip forward/backward on keyboard input (arrow keys) LEFT_RIGHT_MULTIPLIER = 10 # How much larger the skip is when using left-right (A-D) vs up-down (or W-S) def __init__(self, experiment_base_folder, cb_event_start_or_end, user_wants_to_exit, update_xaxis=False): self.cb_event_start_or_end = cb_event_start_or_end self.user_wants_to_exit = user_wants_to_exit self.update_xaxis = update_xaxis # For faster plot update, set this to False and the weight's xaxis will be static (-0:03 -0:02 ... 0:03) self.n = -1 # Frame number self.is_paused = False self.refresh_weight = True self.do_skip_frames = False self.t_lims = 3 # How many seconds of weight to show on either side of curr_t self.initial_scale = 0.5 # Rescale video_img before converting to Tkinter image (~3X faster to render) self.keys_pressed = Queue() self.video_canvas = None self.video_tk_img = None self.weight_canvas = None self.bg_cache = None # Load video info video_in_filename = generate_multicam_video(experiment_base_folder) self.video_in = cv2.VideoCapture(video_in_filename) with h5py.File(os.path.splitext(video_in_filename)[0] + ".h5", 'r') as h5_cam: self.t_cam = np.array(list(date_range(str_to_datetime(h5_cam.attrs['t_start']), str_to_datetime(h5_cam.attrs['t_end']), timedelta(seconds=1.0/h5_cam.attrs['fps'])))) self.video_dims = np.array([self.video_in.get(cv2.CAP_PROP_FRAME_HEIGHT), self.video_in.get(cv2.CAP_PROP_FRAME_WIDTH)]).astype(int) self.video_initial_dims = (self.initial_scale * self.video_dims).astype(int) self.weight_dims = np.array([self.video_initial_dims[0], 350]).astype(int) # Read all weight sensors for the full experiment duration at once t_experiment_start = experiment_base_folder.rsplit('/', 1)[-1] # Last folder in the path should indicate time at which experiment started with h5py.File(os.path.join(experiment_base_folder, "weights_{}.h5".format(t_experiment_start)), 'r') as h5_weights: self.weight_t = np.array([str_to_datetime(t) for t in h5_weights['t_str']]) weight_data = h5_weights['w'][:] w = np.sum(weight_data, axis=1) t_w = time_to_float(self.weight_t, self.weight_t[0]) # Manually align weight and cam timestamps (not synced because OSX and Linux use different NTP servers) self.weight_to_cam_t_offset = self.weight_t[0] + timedelta(seconds=13) # Initialize the offset to ~13s (empirical) # Set up matplotlib figure self.fig = plt.figure(figsize=self.weight_dims[::-1]/100.0) num_subplots = len(w) ax = self.fig.subplots(num_subplots, 1, sharex=True, squeeze=False) self.curr_t_lines = [] for i in range(num_subplots): shelf_i = num_subplots - (i+1) # Shelf 1 is at the bottom # Plot weight and a vertical line at currT. Draw invisible: we'll copy the canvas bgnd, then make it visible ax[i,0].plot(t_w, w[shelf_i]) self.curr_t_lines.append(ax[i,0].axvline(0, linestyle='--', color='black', linewidth=1)) ax[i,0].set_title('Shelf {}'.format(shelf_i+1), fontsize=10, pad=2) ax[i,0].set_xlim(-self.t_lims, self.t_lims) format_axis_as_timedelta(ax[i,0].xaxis) # Render the figure and save background so updating the plot can be much faster (using blit instead of draw) self.update_bg_cache() # Allocate memory space for a video frame and a downsampled copy self.video_img = np.zeros((self.video_dims[0], self.video_dims[1], 3), dtype=np.uint8) def update_bg_cache(self, resize_event=None): if resize_event is not None: self.weight_canvas.resize(resize_event) # Forward event to figure canvas so it resizes the figure axes = self.fig.get_axes() orig_xlims = axes[-1].get_xlim() def set_visibility(is_visible): for i, ax in enumerate(axes): for l in ax.lines: l.set_visible(is_visible) if is_visible: ax.draw_artist(l) # Will need to rerender # update_xaxis=False means weight's xaxis will be static (always show: -0:03 -0:02 ... 0:03) # update_xaxis=True means we'll rerender the xaxis on every replot -> Need to hide the labels before copying the canvas bgnd if self.update_xaxis: is_last = (i == len(axes)-1) ax.tick_params(axis='x', which="both", bottom=is_visible, labelbottom=is_last and is_visible) if is_visible: ax.draw_artist(ax.xaxis) else: ax.set_xlim((-self.t_lims, self.t_lims) if not is_visible else orig_xlims) set_visibility(False) # Make axes and lines invisible plt.tight_layout(0, 0.6) self.fig.canvas.draw() # Rerender full figure self.bg_cache = self.fig.canvas.copy_from_bbox(self.fig.bbox) # Copy the whole canvas set_visibility(True) # Make everything visible again self.fig.canvas.blit() # Rerender only necessary parts def update(self): # Update video frame (if needed) if not self.is_paused or self.do_skip_frames: # Grab next frame self.n += 1 ok = self.video_in.read(self.video_img[:, :self.video_dims[1], :]) assert ok, "Couldn't read frame {}!".format(self.n) print("Read frame {} out of {} frames ({:6.2f}%)".format(self.n+1, len(self.t_cam), 100.0*(self.n+1)/len(self.t_cam))) # Render the frame self.video_canvas.update_image(self.video_img) # Update weight plot (if needed) if self.refresh_weight: # Update current time and redraw whatever needed curr_t = (self.t_cam[self.n]-self.weight_to_cam_t_offset).total_seconds() self.fig.canvas.restore_region(self.bg_cache) # We'll render on top of our cached bgnd (contains subplot frames, shelf number [title], ylabels, etc) for l in self.curr_t_lines: l.set_xdata(curr_t) # Update time cursor (dashed black lines) for ax in self.fig.get_axes(): ax.set_xlim(curr_t-self.t_lims, curr_t+self.t_lims) # Update xlims to be centered on current time for l in ax.lines: ax.draw_artist(l) # Redraw all lines if self.update_xaxis: # Redraw xlabels if needed ax.draw_artist(ax.xaxis) # Refresh weight plot (using blitting for a ~5X speedup vs canvas.draw()) self.fig.canvas.blit() # Process key presses self.handle_kb_input() if self.do_skip_frames: self.video_in.set(cv2.CAP_PROP_POS_FRAMES, self.n) self.n = int(self.video_in.get(cv2.CAP_PROP_POS_FRAMES)) # Don't let it go over the length of the video def handle_kb_input(self): self.do_skip_frames = False self.refresh_weight = False while not self.keys_pressed.empty(): key_info = self.keys_pressed.get() k = key_info.keysym.lower() if k == 'left': # Left arrow (at least on my Mac) self.n -= self.LEFT_RIGHT_MULTIPLIER*self.FRAME_INCREMENT self.do_skip_frames = True self.refresh_weight = True elif k == 'right': # Right arrow self.n += self.LEFT_RIGHT_MULTIPLIER*self.FRAME_INCREMENT self.do_skip_frames = True self.refresh_weight = True elif k == 'up': # Up arrow self.n += self.FRAME_INCREMENT self.do_skip_frames = True self.refresh_weight = True elif k == 'down': # Down arrow self.n -= self.FRAME_INCREMENT self.do_skip_frames = True self.refresh_weight = True elif k == 'a': self.weight_to_cam_t_offset -= self.LEFT_RIGHT_MULTIPLIER*self.TIME_INCREMENT self.refresh_weight = True elif k == 'd': self.weight_to_cam_t_offset += self.LEFT_RIGHT_MULTIPLIER*self.TIME_INCREMENT self.refresh_weight = True elif k == 'w': self.weight_to_cam_t_offset -= self.TIME_INCREMENT self.refresh_weight = True elif k == 's': self.weight_to_cam_t_offset += self.TIME_INCREMENT self.refresh_weight = True elif k == 'b': self.cb_event_start_or_end(True, self.t_cam[self.n]) elif k == 'n': self.cb_event_start_or_end(False, self.t_cam[self.n]) elif k == 'space': self.is_paused = not self.is_paused elif k == 'escape': # Don't exit on unrecognized keys if labeling ground truth print('Esc pressed, exiting!') self.user_wants_to_exit.set() self.refresh_weight = self.refresh_weight or not self.is_paused class GroundTruthLabelerWindow(tk.Tk): VIDEO_AND_WEIGHT_UPDATE_PERIOD = 10 # msec WIN_PAD = 10 GRID_PAD = 3 # 3px between consecutive items in a hor/vert grid (e.g. between video feed and weight plot) def __init__(self, experiment_base_folder): super(GroundTruthLabelerWindow, self).__init__() self.weight_to_cam_t_offset = None self.t_offset_float = 0 self.user_wants_to_exit = Event() self.video_and_weight = VideoAndWeightHandler(experiment_base_folder, self.on_set_event_time_start_or_end, self.user_wants_to_exit) video_canvas_size = self.video_and_weight.video_initial_dims weight_canvas_size = self.video_and_weight.weight_dims # Load product info with open("Dataset/product_info.json", 'r') as f: self.product_info = json.load(f)['products'] options = tuple((p['id'], p['name']) for p in self.product_info) column_headers = ("Time start", "Time end", "Pickup?", "Item ID", "Item name", "Quantity") column_widths = (186, 186, 50, 48, -1, 55) # Setup ui self.title("Ground truth labeler") win_size = np.array((video_canvas_size[1] + weight_canvas_size[1] + 2*self.WIN_PAD + 2*self.GRID_PAD, video_canvas_size[0]+200)) win_offs = (np.array((self.winfo_screenwidth(), self.winfo_screenheight())) - win_size)/2 self.geometry("{s[0]}x{s[1]}+{o[0]}+{o[1]}".format(s=win_size.astype(int), o=win_offs.astype(int))) self.protocol("WM_DELETE_WINDOW", self.on_closing) self.ui_container = tk.Frame(self) self.ui_container.pack(fill='both', expand=True, padx=self.WIN_PAD, pady=self.WIN_PAD) # Variables self.quantity = tk.IntVar(self, 1) self.selected_product = tk.Variable(self, options[0]) self.is_pickup = tk.BooleanVar(self, True) self.t_start = None self.t_end = None self.events = [] # Widgets self.video_and_weight_container = tk.Frame(self) self.video_and_weight_container.grid(row=0, column=0, columnspan=6, sticky='nesw', ipady=self.GRID_PAD/2, in_=self.ui_container) self.video_canvas = ResizableImageCanvas(master=self, width=video_canvas_size[1], height=video_canvas_size[0], highlightthickness=0) self.video_canvas.grid(row=0, column=0, sticky='nesw', in_=self.video_and_weight_container) self.weight_canvas = FigureCanvasTkAgg(self.video_and_weight.fig, master=self) self.weight_canvas.get_tk_widget().grid(row=0, column=1, sticky='ns', padx=(self.WIN_PAD, 0), in_=self.video_and_weight_container) self.weight_canvas.get_tk_widget().bind("<Configure>", self.video_and_weight.update_bg_cache) self.video_and_weight.video_canvas = self.video_canvas self.video_and_weight.weight_canvas = self.weight_canvas self.lst_events = MultiColumnListbox(column_headers, master=self, height=5) for i,w in enumerate(column_widths): if w > 0: self.lst_events.tree.column(i, width=w, stretch=False) self.lst_events.container.grid(row=1, column=0, columnspan=6, pady=self.GRID_PAD, sticky='nesw', in_=self.ui_container) num_quantity = tk.Spinbox(self, from_=1, to_=5, width=1, borderwidth=0, textvariable=self.quantity) num_quantity.grid(row=2, rowspan=2, column=0, in_=self.ui_container) drp_product = tk.OptionMenu(self, self.selected_product, *options) drp_product.grid(row=2, rowspan=2, column=1, sticky='ew', ipadx=10, in_=self.ui_container) opt_pickup = tk.Radiobutton(self, text="Pick up", variable=self.is_pickup, value=True) opt_pickup.grid(row=2, column=2, sticky='ew', ipadx=10, in_=self.ui_container) opt_pickup = tk.Radiobutton(self, text="Put back", variable=self.is_pickup, value=False) opt_pickup.grid(row=3, column=2, sticky='ew', ipadx=0, in_=self.ui_container) tk.Label(self, text="Start:").grid(row=2, column=3, sticky='nsew', in_=self.ui_container) tk.Label(self, text="End:").grid(row=3, column=3, sticky='nsew', in_=self.ui_container) self.txt_t_start = tk.Text(self, state=tk.DISABLED, height=1, width=26) self.txt_t_start.grid(row=2, column=4, sticky='nsew', in_=self.ui_container) self.txt_t_end = tk.Text(self, state=tk.DISABLED, height=1, width=26) self.txt_t_end.grid(row=3, column=4, sticky='nsew', in_=self.ui_container) self._update_time() # Initialize their text btn_add_event = tk.Button(self, text="Add event", command=self.add_event) btn_add_event.grid(row=2, rowspan=2, column=5, in_=self.ui_container) # Event handling self.bind('<KeyPress>', self.video_and_weight.keys_pressed.put) self.lst_events.tree.bind('<KeyPress>', self.remove_event) # Make grids expandable on window resize self.ui_container.grid_rowconfigure(0, weight=10, minsize=200) self.ui_container.grid_rowconfigure(1, weight=1, minsize=50) self.ui_container.grid_columnconfigure(1, weight=1) self.video_and_weight_container.grid_columnconfigure(0, weight=1) self.video_and_weight_container.grid_rowconfigure(0, weight=1) def run(self): # Load the first image self.update() self.update_canvas() # Run main loop self.mainloop() # Save final offset values self.weight_to_cam_t_offset = self.video_and_weight.weight_to_cam_t_offset self.t_offset_float = (self.video_and_weight.weight_t[0]-self.weight_to_cam_t_offset).total_seconds() def on_set_event_time_start_or_end(self, is_start, t): if is_start: self.t_start = t else: self.t_end = t self._update_time() def update_canvas(self): # Update video frame and weight plot self.video_and_weight.update() # Check if user pressed Escape if self.user_wants_to_exit.is_set(): self.on_closing() else: self.after(self.VIDEO_AND_WEIGHT_UPDATE_PERIOD, self.update_canvas) def on_closing(self): # Save state of the events list before destroying events_info = (self.lst_events.tree.item(child, values=None) for child in self.lst_events.tree.get_children('')) self.events = [{ "t_start": event[0], "t_end": event[1], "is_pickup": str2bool(event[2]), "item_id": int(event[3]), "item_name": event[4], "quantity": int(event[5]) } for event in events_info] if messagebox.askokcancel("Exit?", "Are you sure you're done annotating this experiment's ground truth?\nWe've registered {} event{}".format(len(self.events), '' if len(self.events)==1 else 's')): self.destroy() def add_event(self): prod_id, prod_name = self.selected_product.get() new_item = (self.t_start, self.t_end, self.is_pickup.get(), prod_id, prod_name, self.quantity.get()) print("Adding event: {}".format(new_item)) self.lst_events.add_item(new_item) # Reset state self.quantity.set(1) self.t_start = None self.t_end = None self._update_time() def remove_event(self, k): if k.keysym == 'BackSpace' or k.keysym == 'Delete': selected_items = self.lst_events.tree.selection() if len(selected_items) > 0 and messagebox.askokcancel("Are you sure?", "Are you sure you want to remove {} item{}?".format(len(selected_items), 's' if len(selected_items)>1 else '')): self.lst_events.tree.delete(*selected_items) def _set_time(self, is_t_start): if is_t_start: txt_box = self.txt_t_start text = self.t_start if self.t_start is not None else "Press 'b' to set t_start" else: txt_box = self.txt_t_end text = self.t_end if self.t_end is not None else "Press 'n' to set t_end" # Update text (need to set state to normal, change text, then disable the widget again) txt_box.config(state='normal') txt_box.delete(1.0, 'end') txt_box.insert('end', text) txt_box.config(state='disabled') def _update_time(self): self._set_time(True) self._set_time(False) class GroundTruthLabeler(ExperimentTraverser): def process_subfolder(self, f): experiment_folder = os.path.join(self.main_folder, f) ground_truth_file = os.path.join(experiment_folder, "ground_truth.json") if os.path.exists(ground_truth_file): print("Video already annotated!! Skipping (delete '{}' and run this tool again if you want to overwrite)".format(ground_truth_file)) return # Open ground truth labeling windows gt_labeler = GroundTruthLabelerWindow(experiment_folder) gt_labeler.run() print("Generate_video finished! Weight-camera time offset manually set as {} ({}s wrt weight's own timestamps)".format(gt_labeler.weight_to_cam_t_offset, gt_labeler.t_offset_float)) annotated_events = gt_labeler.events print("Received annotated events: {}".format(annotated_events)) with open(ground_truth_file, 'w') as f_gt: json.dump({ 'ground_truth': annotated_events, 'weight_to_cam_t_offset': str(gt_labeler.weight_to_cam_t_offset), 'weight_to_cam_t_offset_float': gt_labeler.t_offset_float, }, f_gt, indent=2) print("Ground truth annotation saved as '{}'!".format(ground_truth_file)) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument("folder", default="Dataset/Evaluation", help="Folder containing the experiment to visualize") parser.add_argument("-s", "--start-datetime", default="", help="Only annotate experiments collected later than this datetime (format: {}; empty for no limit)".format(EXPERIMENT_DATETIME_STR_FORMAT)) parser.add_argument("-e", "--end-datetime", default="", help="Only annotate experiments collected before this datetime (format: {}; empty for no limit)".format(EXPERIMENT_DATETIME_STR_FORMAT)) args = parser.parse_args() t_start = datetime.strptime(args.start_datetime, EXPERIMENT_DATETIME_STR_FORMAT) if len(args.start_datetime) > 0 else datetime.min t_end = datetime.strptime(args.end_datetime, EXPERIMENT_DATETIME_STR_FORMAT) if len(args.end_datetime) > 0 else datetime.max GroundTruthLabeler(args.folder, t_start, t_end).run()