Skip to content

Commit

Permalink
Merge pull request mbari-org#254 from MBARIMike/main
Browse files Browse the repository at this point in the history
Read *_exclude_list.xlsx & write *_exclude_list.csv files in the SMDB directories
  • Loading branch information
MBARIMike authored Jun 5, 2024
2 parents 7cf97f7 + e5e7204 commit 14b7b18
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 10 deletions.
4 changes: 2 additions & 2 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@
//"args": ["-v", "1", "--skipuntil_regex", "--regex", "MappingAUVOps2018/20180725m1/ZTopo.grd$", "--limit", "2"],
//"args": [ "-v", "2", "--clobber", "--skipuntil_regex", "--regex", "mbsystem/Data/2012/20120103_MB-System_Tutorial_OFG/ProprietaryOFGSampleData/20101127OFGSample/multibeam_hsx"],
//"args": [ "-v", "1", "--clobber", "--noinput", "--skipuntil_regex", "--regex", "MappingAUVOps2014/Sentry_Loihi/2014-glazer/dives/sentry265/multibeam/mbari/ZTopo.grd$", "--limit", "1" ],
"args": [ "-v", "1", "--skipuntil_regex", "--regex", "2021/20210812m1/ZTopo.grd$", "--limit", "10" ],
//"args": [ "-v", "1", "--skipuntil_regex", "--regex", "2021/20210812m1/ZTopo.grd$", "--limit", "10" ],
//"args": [ "-v", "1", "--skipuntil_regex", "--regex", "2023/20230511d1/photogrammetry/ZTopo.grd$", "--limit", "1" ],
//"args": [ "-v", "1", "--skipuntil_regex", "--regex", "2019/20190820m1/ZTopo.grd$", "--limit", "100" ],
//"args": [ "-v", "1", "--skipuntil_regex", "--regex", "2023/20230310m1/ZTopo.grd$", "--limit", "10" ],
Expand Down Expand Up @@ -83,7 +83,7 @@
//"args": ["--compilation", "-v", "1", "--skipuntil", "2022/AxialSeamount/FiguresCaress", "--limit", "10", "--log_file", "compilation.txt"],
//"args": ["--compilation", "-v", "1"],
//"args": ["--spreadsheets", "-v", "1"],
//"args": ["-v", "1", "--last_n_days", "0.5"],
"args": ["-v", "1", "--last_n_days", "0.5"],
//"args": ["-v", "1", "--spreadsheets", "--parent_dir", "2024", "--append_to_log_file"],
//"args": ["-v", "1", "--spreadsheets"],
//"args": ["-v", "1", "--compilation", "--last_n_days", "30"],
Expand Down
80 changes: 72 additions & 8 deletions smdb/scripts/load.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def __init__(self):
self._log_levels = (logging.WARN, logging.INFO, logging.DEBUG)
self._log_strings = ("WARN", "INFO", "DEBUG")
self.commandline = None
self.exclude_paths = []
self.exclude_paths = set()
self.start_proc = datetime.now()

def process_command_line(self):
Expand Down Expand Up @@ -242,12 +242,6 @@ def process_command_line(self):
self.logger.addHandler(file_handler)
self.logger.setLevel(self._log_levels[self.args.verbose])

if not self.exclude_paths:
with open(self.args.exclude) as fh:
for line in fh:
if line.startswith("/mbari/SeafloorMapping/"):
self.exclude_paths.append(line.strip())

if self.args.clobber_log_file:
self.logger.info("Saving to new local log file: %s", self.LOCAL_LOG_FILE)
else:
Expand Down Expand Up @@ -1176,7 +1170,7 @@ def load_from_grds(self):
exclude_count = 0
if self.args.last_n_days:
self.logger.info(
"Loading Missions newer than %d days", self.args.last_n_days
"Loading Missions newer than %f days", self.args.last_n_days
)
with subprocess.Popen(locate_cmd, shell=True, stdout=subprocess.PIPE) as proc:
for count, fp in enumerate(proc.stdout, start=1):
Expand Down Expand Up @@ -1756,19 +1750,80 @@ def process_csv(self, xlsx_files_processed: List[str] = None):
self.logger.info(f"Wrote {count} Missions to {csv_file}")


class ExcludeFile(BaseLoader):
def read_config_exclude_list(self) -> None:
if not self.exclude_paths:
with open(self.args.exclude) as fh:
for line in fh:
if line.startswith("/mbari/SeafloorMapping/"):
self.exclude_paths.add(line.strip())
self.logger.info(
f"Read {len(self.exclude_paths)} paths to exclude from {self.args.exclude}"
)

def read_exclude_path_xlsxs(self) -> None:
"""Walk the <parent_dir>/SMDB folders for files named <parent_dir>_exlude_list.xlsx'
and load the rows into the exclude_paths set. This is used to skip over directories
that we don't want to process.
"""
count = 0
for parent_dir in os.listdir(MBARI_DIR):
try:
xlsx_file = os.path.join(
MBARI_DIR, parent_dir, "SMDB", f"{parent_dir}_exclude_list.xlsx"
)
if not os.path.exists(xlsx_file):
continue
df = pd.read_excel(xlsx_file, engine="openpyxl")
for path in df["path"]:
count += 1
self.exclude_paths.add(path)
except (FileNotFoundError, NotADirectoryError, PermissionError) as e:
self.logger.debug(f"Could not open {xlsx_file} for reading: {e}")
self.logger.info(f"Read {count} paths to exclude from {xlsx_file}")

def write_exclude_path_csvs(self) -> None:
"""Write the exclude_paths to <parent_dir>/SMDB/exlude_list_<parent_dir>.csv files"""
# This method may only need to be run once to create the <parent_dir> exclude files from
# the original exclude.list file maintained in the git repo in smdb/config/exclude.list

# Build hash of paths keyed by parent_dir
pd_hash = {}
for path in sorted(self.exclude_paths):
parent_dir = path.split(MBARI_DIR)[1].split("/")[0]
if parent_dir not in pd_hash:
pd_hash[parent_dir] = []
pd_hash[parent_dir].append(path)
# Write out the exclude files
for parent_dir, paths in pd_hash.items():
if not os.path.isdir(os.path.join(MBARI_DIR, parent_dir)):
self.logger.warning("No directory found for %s", parent_dir)
csv_file = os.path.join(
MBARI_DIR, parent_dir, "SMDB", f"{parent_dir}_exclude_list.csv"
)
with open(csv_file, "w") as fh:
fh.write("path\n")
for path in paths:
fh.write(f"{path}\n")
self.logger.info(f"Wrote {len(paths)} paths to {csv_file}")


def run(*args):
# Possible use: https://django-extensions.readthedocs.io/en/latest/runscript.html
bl = BaseLoader()
bl.process_command_line()
bl.logger.debug("Arguments passed to run(): %s", " ".join(args))
if bl.args.bootstrap and bl.args.notes and bl.args.fnv:
exclude_file_load()
missions_saved = bootstrap_load()
notes_load(missions_saved)
fnv_load(missions_saved)
elif bl.args.bootstrap and bl.args.notes:
exclude_file_load()
missions_saved = bootstrap_load()
notes_load(missions_saved)
elif bl.args.bootstrap:
exclude_file_load()
missions_saved = bootstrap_load()
elif bl.args.notes:
notes_load(missions_saved)
Expand All @@ -1781,6 +1836,7 @@ def run(*args):
elif bl.args.spreadsheets:
spreadsheets_load()
else:
exclude_file_load()
missions_saved = bootstrap_load()
notes_load(missions_saved)
fnv_load(missions_saved)
Expand All @@ -1790,6 +1846,14 @@ def run(*args):
bl.save_logger_output()


def exclude_file_load():
ef = ExcludeFile()
ef.process_command_line()
ef.read_config_exclude_list()
ef.read_exclude_path_xlsxs()
ef.write_exclude_path_csvs()


def bootstrap_load() -> list:
bs = BootStrapper()
bs.process_command_line()
Expand Down

0 comments on commit 14b7b18

Please sign in to comment.