diff --git a/docs/requirements.txt b/docs/requirements.txt index ce2b7952f..d8b5a55ca 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -18,6 +18,7 @@ parameterized==0.7.4 tabulate pycocotools>=2.0.1 fvcore +iopath==0.1.9 fairscale git+git://github.com/facebookresearch/ClassyVision.git https://download.pytorch.org/whl/cpu/torch-1.5.0%2Bcpu-cp37-cp37m-linux_x86_64.whl diff --git a/extra_scripts/convert_caffe2_to_torchvision_resnet.py b/extra_scripts/convert_caffe2_to_torchvision_resnet.py index 82fe3cb08..30c98df6b 100644 --- a/extra_scripts/convert_caffe2_to_torchvision_resnet.py +++ b/extra_scripts/convert_caffe2_to_torchvision_resnet.py @@ -19,7 +19,7 @@ from collections import OrderedDict import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr # create the logger @@ -124,7 +124,7 @@ def _rename_weights_for_resnet(weights, stage_names): def _load_c2_pickled_weights(file_path): - with PathManager.open(file_path, "rb") as f: + with g_pathmgr.open(file_path, "rb") as f: data = pickle.load(f, encoding="latin1") if "blobs" in data: weights = data["blobs"] diff --git a/extra_scripts/convert_caffe2_to_vissl_alexnet.py b/extra_scripts/convert_caffe2_to_vissl_alexnet.py index aa4bb1680..cbbcd3499 100644 --- a/extra_scripts/convert_caffe2_to_vissl_alexnet.py +++ b/extra_scripts/convert_caffe2_to_vissl_alexnet.py @@ -23,7 +23,7 @@ import numpy as np import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr # create the logger @@ -58,7 +58,7 @@ def remove_jigsaw_names(data): def _load_c2_pickled_weights(file_path): - with PathManager.open(file_path, "rb") as f: + with g_pathmgr.open(file_path, "rb") as f: data = pickle.load(f, encoding="latin1") if "blobs" in data: weights = data["blobs"] @@ -71,7 +71,7 @@ def _load_c2_weights(file_path): if file_path.endswith("pkl"): weights = _load_c2_pickled_weights(file_path) elif file_path.endswith("npy"): - with PathManager.open(file_path, "rb") as fopen: + with g_pathmgr.open(file_path, "rb") as fopen: weights = np.load(fopen, allow_pickle=True, encoding="latin1")[()] return weights diff --git a/extra_scripts/convert_folder_to_filelist.py b/extra_scripts/convert_folder_to_filelist.py index 4d547a7e6..35c420712 100644 --- a/extra_scripts/convert_folder_to_filelist.py +++ b/extra_scripts/convert_folder_to_filelist.py @@ -21,7 +21,7 @@ import argparse import os -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.env import setup_path_manager from vissl.utils.io import save_file @@ -58,13 +58,13 @@ def get_argument_parser(): setup_path_manager() - splits = PathManager.ls(args.input) + splits = g_pathmgr.ls(args.input) print(f"The following splits are found: { ','.join(splits) }") dataset_summary = {} for split in ["train", "trainval", "val", "test"]: - if not PathManager.exists(os.path.join(args.input, split)): + if not g_pathmgr.exists(os.path.join(args.input, split)): continue dataset_summary[split] = {} @@ -72,7 +72,7 @@ def get_argument_parser(): img_labels = [] split_path = os.path.join(args.input, split) - label_paths = PathManager.ls(split_path) + label_paths = g_pathmgr.ls(split_path) dataset_summary[split]["labels"] = label_paths dataset_summary[split]["num_labels"] = len(label_paths) print(f"{len(label_paths)} classes found for { split } split.") @@ -81,7 +81,7 @@ def get_argument_parser(): # Populate the img_paths and img_labels based on torchvision image folder file structure. for label in label_paths: label_path = os.path.join(split_path, label) - images = PathManager.ls(os.path.join(split_path, label)) + images = g_pathmgr.ls(os.path.join(split_path, label)) print(f"{len(images)} examples found for { label }, { split }.") total_split_examples += len(images) for image in images: @@ -94,17 +94,17 @@ def get_argument_parser(): # Remove the split .npy filelist if they exist and resave them.. image_path = os.path.join(args.output, f"{split}_images.npy") - PathManager.rm(image_path) + g_pathmgr.rm(image_path) save_file(img_paths, image_path) print(f"Saved { image_path }") label_path = os.path.join(args.output, f"{split}_labels.npy") - PathManager.rm(label_path) + g_pathmgr.rm(label_path) save_file(img_labels, label_path) print(f"Saved { label_path }") # Save dataset summary. dataset_summary_path = os.path.join(args.output, "dataset_summary.json") - PathManager.rm(dataset_summary_path) + g_pathmgr.rm(dataset_summary_path) save_file(dataset_summary, dataset_summary_path) diff --git a/extra_scripts/convert_sharded_checkpoint.py b/extra_scripts/convert_sharded_checkpoint.py index a629470ec..d7747feae 100644 --- a/extra_scripts/convert_sharded_checkpoint.py +++ b/extra_scripts/convert_sharded_checkpoint.py @@ -20,7 +20,7 @@ import enum import os -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.checkpoint import CheckpointFormatConverter from vissl.utils.env import setup_path_manager from vissl.utils.io import makedir @@ -41,7 +41,7 @@ class CheckpointType(enum.Enum): def convert_checkpoint(input_path: str, output_path: str, output_type: str): - assert PathManager.exists( + assert g_pathmgr.exists( input_path ), f"Checkpoint input path: {input_path} not found." diff --git a/extra_scripts/convert_vissl_to_torchvision.py b/extra_scripts/convert_vissl_to_torchvision.py index 4ae29b697..d73a71360 100644 --- a/extra_scripts/convert_vissl_to_torchvision.py +++ b/extra_scripts/convert_vissl_to_torchvision.py @@ -12,7 +12,7 @@ import sys import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.checkpoint import replace_module_prefix from vissl.utils.io import is_url @@ -30,7 +30,7 @@ def convert_and_save_model(args, replace_prefix): - assert PathManager.exists(args.output_dir), "Output directory does NOT exist" + assert g_pathmgr.exists(args.output_dir), "Output directory does NOT exist" # load the model model_path = args.model_url_or_file diff --git a/extra_scripts/create_low_shot_samples.py b/extra_scripts/create_low_shot_samples.py index b03d1dac7..61a48aaec 100644 --- a/extra_scripts/create_low_shot_samples.py +++ b/extra_scripts/create_low_shot_samples.py @@ -11,7 +11,7 @@ import random import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.io import load_file, save_file @@ -171,7 +171,7 @@ def main(): ) opts = parser.parse_args() - assert PathManager.exists(opts.targets_data_file), "Target file not found. Abort" + assert g_pathmgr.exists(opts.targets_data_file), "Target file not found. Abort" targets = load_file(opts.targets_data_file) sample_ids = list(range(1, 1 + opts.num_samples)) diff --git a/extra_scripts/datasets/create_coco_data_files.py b/extra_scripts/datasets/create_coco_data_files.py index 726bdb7ad..50df9a821 100644 --- a/extra_scripts/datasets/create_coco_data_files.py +++ b/extra_scripts/datasets/create_coco_data_files.py @@ -15,7 +15,7 @@ import sys import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from pycocotools.coco import COCO @@ -53,8 +53,8 @@ def get_valid_objs(entry, objs): def get_imgs_labels_info(split, json_file, args): - assert PathManager.exists(json_file), "Data source does not exist. Abort" - json_data = json.load(PathManager.open(json_file, "r")) + assert g_pathmgr.exists(json_file), "Data source does not exist. Abort" + json_data = json.load(g_pathmgr.open(json_file, "r")) image_index = [x["id"] for x in json_data["images"]] coco = COCO(json_file) diff --git a/extra_scripts/datasets/create_imagenet_a_data_files.py b/extra_scripts/datasets/create_imagenet_a_data_files.py index 6a6205ae5..52f0a160f 100644 --- a/extra_scripts/datasets/create_imagenet_a_data_files.py +++ b/extra_scripts/datasets/create_imagenet_a_data_files.py @@ -8,7 +8,7 @@ import numpy as np import torchvision.datasets as datasets -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from tqdm import tqdm from vissl.utils.download import download_and_extract_archive from vissl.utils.io import cleanup_dir @@ -44,7 +44,7 @@ def get_argument_parser(): def remove_file_name_whitespace(input_path: str): """ - Remove the whitespace in the file names for better compatibility with PathManager. + Remove the whitespace in the file names for better compatibility with g_pathmgr. """ for class_folder_path in os.listdir(input_path): # All necessary folders start with n. @@ -82,7 +82,7 @@ class ImagenetTargetMapper: ) def __init__(self): - with PathManager.open(self.IMAGENET_TARGETS_URL) as f: + with g_pathmgr.open(self.IMAGENET_TARGETS_URL) as f: imagenet_classes = [line.strip() for line in f.readlines()] imagenet_classes.sort() self.label_to_id = {label: i for i, label in enumerate(imagenet_classes)} @@ -143,7 +143,7 @@ def cleanup_unused_files(output_path: str): download_datasets(args.input) input_path = os.path.join(args.input, "imagenet-a") - assert PathManager.exists(input_path), "Input data path does not exist" + assert g_pathmgr.exists(input_path), "Input data path does not exist" remove_file_name_whitespace(input_path) create_imagenet_test_files(input_path, args.output) diff --git a/extra_scripts/datasets/create_imagenet_data_files.py b/extra_scripts/datasets/create_imagenet_data_files.py index 19bee9d51..e6e993a57 100644 --- a/extra_scripts/datasets/create_imagenet_data_files.py +++ b/extra_scripts/datasets/create_imagenet_data_files.py @@ -16,7 +16,7 @@ import sys import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr # initiate the logger @@ -36,7 +36,7 @@ def get_all_classes(data_dir): def get_images_labels_info(split, args): - assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort!" + assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort!" data_dir = f"{args.data_source_dir}/{split}" class_idx = get_all_classes(data_dir) logger.info("Number of classes in {} data: {}".format(split, len(class_idx))) @@ -103,7 +103,7 @@ def main(): json_out_path = f"{args.output_dir}/{partition}_targets.json" import json - with PathManager.open(json_out_path, "w") as fp: + with g_pathmgr.open(json_out_path, "w") as fp: json.dump(output_dict, fp) logger.info("Saved Json to: {}".format(json_out_path)) logger.info("DONE!") diff --git a/extra_scripts/datasets/create_imagenet_r_data_files.py b/extra_scripts/datasets/create_imagenet_r_data_files.py index 55fd5a482..4d20e52f4 100644 --- a/extra_scripts/datasets/create_imagenet_r_data_files.py +++ b/extra_scripts/datasets/create_imagenet_r_data_files.py @@ -10,7 +10,7 @@ create_imagenet_test_files, remove_file_name_whitespace, ) -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.download import download_and_extract_archive from vissl.utils.io import cleanup_dir @@ -77,7 +77,7 @@ def cleanup_unused_files(output_path: str): dataset_name = "imagenet-r" input_path = os.path.join(args.input, dataset_name) - assert PathManager.exists(input_path), "Input data path does not exist" + assert g_pathmgr.exists(input_path), "Input data path does not exist" remove_file_name_whitespace(input_path) create_imagenet_test_files(input_path, args.output) diff --git a/extra_scripts/datasets/create_imagenet_sketch_data_files.py b/extra_scripts/datasets/create_imagenet_sketch_data_files.py index 94c773940..0c11c3b9b 100644 --- a/extra_scripts/datasets/create_imagenet_sketch_data_files.py +++ b/extra_scripts/datasets/create_imagenet_sketch_data_files.py @@ -10,7 +10,7 @@ create_imagenet_test_files, remove_file_name_whitespace, ) -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from torchvision.datasets.utils import extract_archive from vissl.utils.download import download_google_drive_url from vissl.utils.io import cleanup_dir @@ -82,7 +82,7 @@ def cleanup_unused_files(output_path: str): download_datasets(args.input) input_path = os.path.join(args.input, "imagenet_sketch") - assert PathManager.exists(input_path), "Input data path does not exist" + assert g_pathmgr.exists(input_path), "Input data path does not exist" remove_file_name_whitespace(input_path) create_imagenet_test_files(input_path, args.output) diff --git a/extra_scripts/datasets/create_inaturalist2018_data_files.py b/extra_scripts/datasets/create_inaturalist2018_data_files.py index e97680335..593282ed4 100644 --- a/extra_scripts/datasets/create_inaturalist2018_data_files.py +++ b/extra_scripts/datasets/create_inaturalist2018_data_files.py @@ -20,7 +20,7 @@ import sys import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.download import download_and_extract_archive from vissl.utils.io import save_file @@ -111,10 +111,10 @@ def main(): args = parser.parse_args() # Make sure that the input and output directories exist. - assert PathManager.exists( + assert g_pathmgr.exists( args.input_dir_path ), "Data input directory not found! Please create the directory" - assert PathManager.exists( + assert g_pathmgr.exists( args.output_dir_path ), "Data output directory not found! Please create the directory" diff --git a/extra_scripts/datasets/create_sun397_data_files.py b/extra_scripts/datasets/create_sun397_data_files.py index 220d20548..16a381d01 100644 --- a/extra_scripts/datasets/create_sun397_data_files.py +++ b/extra_scripts/datasets/create_sun397_data_files.py @@ -10,7 +10,7 @@ from typing import Any, List import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from tqdm import tqdm from vissl.utils.download import download_and_extract_archive @@ -95,7 +95,7 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed: by allocating 70% of labels to "train", 10% to "val" and 20% to "test". """ random.seed(seed) - PathManager.mkdirs(output_path) + g_pathmgr.mkdirs(output_path) # List all the available classes in SUN397 and their path image_folder = os.path.join(input_path, "SUN397") @@ -129,10 +129,10 @@ def create_sun397_disk_filelist_dataset(input_path: str, output_path: str, seed: # Save each split for split, samples in splits_data.items(): image_output_path = os.path.join(output_path, f"{split}_images.npy") - with PathManager.open(image_output_path, mode="wb") as f: + with g_pathmgr.open(image_output_path, mode="wb") as f: np.save(f, np.array(samples.image_paths)) label_output_path = os.path.join(output_path, f"{split}_labels.npy") - with PathManager.open(label_output_path, mode="wb") as f: + with g_pathmgr.open(label_output_path, mode="wb") as f: np.save(f, np.array(samples.image_labels)) diff --git a/extra_scripts/datasets/create_voc_data_files.py b/extra_scripts/datasets/create_voc_data_files.py index 04db9d4fb..9dae1abc8 100644 --- a/extra_scripts/datasets/create_voc_data_files.py +++ b/extra_scripts/datasets/create_voc_data_files.py @@ -16,7 +16,7 @@ from glob import glob import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr # initiate the logger @@ -39,7 +39,7 @@ def validate_files(input_files): def get_data_files(split, args): data_dir = f"{args.data_source_dir}/ImageSets/Main" - assert PathManager.exists(data_dir), "Data: {} doesn't exist".format(data_dir) + assert g_pathmgr.exists(data_dir), "Data: {} doesn't exist".format(data_dir) test_data_files = glob(os.path.join(data_dir, "*_test.txt")) test_data_files = validate_files(test_data_files) if args.separate_partitions > 0: @@ -68,7 +68,7 @@ def get_data_files(split, args): def get_images_labels_info(split, args): - assert PathManager.exists(args.data_source_dir), "Data source NOT found. Abort" + assert g_pathmgr.exists(args.data_source_dir), "Data source NOT found. Abort" data_files = get_data_files(split, args) # we will construct a map for image name to the vector of -1, 0, 1 @@ -77,7 +77,7 @@ def get_images_labels_info(split, args): for cls_num, data_path in enumerate(sorted(data_files)): # for this class, we have images and each image will have label # 1, -1, 0 -> present, not present, ignore respectively as in VOC data. - with PathManager.open(data_path, "r") as fopen: + with g_pathmgr.open(data_path, "r") as fopen: for line in fopen: try: img_name, orig_label = line.strip().split() @@ -174,7 +174,7 @@ def main(): json_out_path = f"{args.output_dir}/{partition}_targets.json" import json - with PathManager.open(json_out_path, "w") as fp: + with g_pathmgr.open(json_out_path, "w") as fp: json.dump(output_dict, fp) logger.info("Saved Json to: {}".format(json_out_path)) logger.info("DONE!") diff --git a/requirements.txt b/requirements.txt index dfac70660..95a5cfc58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,7 @@ cython==0.29.22 fairscale@https://github.com/facebookresearch/fairscale/tarball/df7db85cef7f9c30a5b821007754b96eb1f977b6 fvcore==0.1.3.post20210317 +iopath==0.1.9 hydra-core==1.0.7 numpy==1.19.5 parameterized==0.7.4 diff --git a/tools/cluster_features_and_label.py b/tools/cluster_features_and_label.py index d5b074b05..941250169 100644 --- a/tools/cluster_features_and_label.py +++ b/tools/cluster_features_and_label.py @@ -16,7 +16,7 @@ import numpy as np import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.data import build_dataset from vissl.hooks import default_hook_generator @@ -167,7 +167,7 @@ def _create_dataset_split( "images": image_paths, } output_dir = cfg.CLUSTERFIT.OUTPUT_DIR - PathManager.mkdirs(output_dir) + g_pathmgr.mkdirs(output_dir) output_prefix = ( f"{data_name}_{data_split}_{layer_name}_N{num_clusters}_D{features_dim}" ) diff --git a/tools/instance_retrieval_test.py b/tools/instance_retrieval_test.py index b1696361d..9ca41a485 100644 --- a/tools/instance_retrieval_test.py +++ b/tools/instance_retrieval_test.py @@ -13,7 +13,7 @@ import torch import torchvision from classy_vision.generic.util import copy_model_to_gpu, load_checkpoint -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.models import build_model from vissl.utils.checkpoint import ( @@ -63,7 +63,7 @@ def build_retrieval_model(cfg): """ logging.info("Building model....") model = build_model(cfg.MODEL, cfg.OPTIMIZER) - if PathManager.exists(cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE): + if g_pathmgr.exists(cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE): init_weights_path = cfg.MODEL.WEIGHTS_INIT.PARAMS_FILE logging.info(f"Initializing model from: {init_weights_path}") weights = load_checkpoint(init_weights_path, device=torch.device("cuda")) @@ -129,7 +129,7 @@ def process_train_image(i, out_dir, verbose=False): if out_dir: fname_out = f"{out_dir}/{i}.npy" - if fname_out and PathManager.exists(fname_out): + if fname_out and g_pathmgr.exists(fname_out): feat = load_file(fname_out) train_features.append(feat) else: @@ -297,7 +297,7 @@ def get_dataset_features( if db_fname_out_dir: db_fname_out = f"{db_fname_out_dir}/{idx}.npy" - if db_fname_out and PathManager.exists(db_fname_out): + if db_fname_out and g_pathmgr.exists(db_fname_out): db_feature = load_file(db_fname_out) else: db_feature = process_eval_image( @@ -360,7 +360,7 @@ def get_queries_features( if q_fname_out_dir: q_fname_out = f"{q_fname_out_dir}/{idx}.npy" - if q_fname_out and PathManager.exists(q_fname_out): + if q_fname_out and g_pathmgr.exists(q_fname_out): query_feature = load_file(q_fname_out) else: query_feature = process_eval_image( @@ -413,7 +413,7 @@ def get_train_dataset(cfg, root_dataset_path, train_dataset_name, eval_binary_pa if cfg.IMG_RETRIEVAL.TRAIN_PCA_WHITENING: train_data_path = f"{root_dataset_path}/{train_dataset_name}" - assert PathManager.exists(train_data_path), f"Unknown path: {train_data_path}" + assert g_pathmgr.exists(train_data_path), f"Unknown path: {train_data_path}" num_samples = ( None @@ -462,7 +462,7 @@ def compute_l2_distance_matrix(features_queries, features_dataset): def get_eval_dataset(cfg, root_dataset_path, eval_dataset_name, eval_binary_path): eval_data_path = f"{root_dataset_path}/{eval_dataset_name}" - assert PathManager.exists(eval_data_path), f"Unknown path: {eval_data_path}" + assert g_pathmgr.exists(eval_data_path), f"Unknown path: {eval_data_path}" num_samples = ( None @@ -556,7 +556,7 @@ def instance_retrieval_test(args, cfg): if temp_dir: pca_out_fname = f"{temp_dir}/{train_dataset_name}_S{resize_img}_PCA.pickle" - if pca_out_fname and PathManager.exists(pca_out_fname): + if pca_out_fname and g_pathmgr.exists(pca_out_fname): logging.info("Loading PCA...") pca = load_pca(pca_out_fname) else: diff --git a/tools/launch_benchmark_suite_scheduler_slurm.py b/tools/launch_benchmark_suite_scheduler_slurm.py index d06affe4e..5ed70362b 100644 --- a/tools/launch_benchmark_suite_scheduler_slurm.py +++ b/tools/launch_benchmark_suite_scheduler_slurm.py @@ -7,7 +7,7 @@ import pkg_resources import submitit -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config.attr_dict import AttrDict from vissl.utils.benchmark_suite_scheduler import BenchmarkSuiteScheduler from vissl.utils.hydra_config import assert_hydra_dependency @@ -56,7 +56,7 @@ def checkpoint(self): def launch_benchmark_suite_scheduler(config_file): - assert PathManager.exists(config_file), "Slurm evaluator config file must exist" + assert g_pathmgr.exists(config_file), "Slurm evaluator config file must exist" user_config = load_file(config_file) config = _DEFAULT_CONFIG.copy() diff --git a/vissl/data/dataset_catalog.py b/vissl/data/dataset_catalog.py index 9bcf5b6c3..4ab6327cd 100644 --- a/vissl/data/dataset_catalog.py +++ b/vissl/data/dataset_catalog.py @@ -13,7 +13,7 @@ from typing import List import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.data.datasets import get_coco_imgs_labels_info, get_voc_images_labels_info from vissl.utils.misc import get_json_data_catalog_file from vissl.utils.slurm import get_slurm_dir @@ -39,7 +39,7 @@ def register_json(json_catalog_path): Args: filepath: a .json filepath that contains the data to be registered """ - with PathManager.open(json_catalog_path) as fopen: + with g_pathmgr.open(json_catalog_path) as fopen: data_catalog = json.load(fopen) for key, value in data_catalog.items(): VisslDatasetCatalog.register_data(key, value) @@ -135,17 +135,17 @@ def get_local_path(input_file, dest_dir): - If both above fail, we return the input_file as is. """ out = "" - if PathManager.isfile(input_file): + if g_pathmgr.isfile(input_file): out = os.path.join(dest_dir, os.path.basename(input_file)) - elif PathManager.isdir(input_file): + elif g_pathmgr.isdir(input_file): data_name = input_file.strip("/").split("/")[-1] if "SLURM_JOBID" in os.environ: dest_dir = get_slurm_dir(dest_dir) dest_dir = os.path.join(dest_dir, data_name) complete_flag = os.path.join(dest_dir, "copy_complete") - if PathManager.isfile(complete_flag): + if g_pathmgr.isfile(complete_flag): out = dest_dir - if PathManager.exists(out): + if g_pathmgr.exists(out): return out else: return input_file @@ -172,9 +172,9 @@ def check_data_exists(data_files): we iteratively check for each file in the list. """ if isinstance(data_files, list): - return np.all([PathManager.exists(item) for item in data_files]) + return np.all([g_pathmgr.exists(item) for item in data_files]) else: - return PathManager.exists(data_files) + return g_pathmgr.exists(data_files) def register_pascal_voc(): @@ -188,7 +188,7 @@ def register_pascal_voc(): for voc_data in voc_datasets: data_info = VisslDatasetCatalog.get(voc_data) data_folder = data_info["train"][0] - if PathManager.exists(data_folder): + if g_pathmgr.exists(data_folder): train_data_info = get_voc_images_labels_info("train", data_folder) test_data_info = get_voc_images_labels_info("val", data_folder) data_info["train"] = train_data_info @@ -208,7 +208,7 @@ def register_coco(): """ data_info = VisslDatasetCatalog.get("coco2014_folder") data_folder = data_info["train"][0] - if PathManager.exists(data_folder): + if g_pathmgr.exists(data_folder): train_data_info = get_coco_imgs_labels_info("train", data_folder) test_data_info = get_coco_imgs_labels_info("val", data_folder) data_info["train"] = train_data_info @@ -228,7 +228,7 @@ def register_datasets(json_catalog_path): Args: json_catalog_path (str): the path to the json dataset catalog """ - if PathManager.exists(json_catalog_path): + if g_pathmgr.exists(json_catalog_path): logging.info(f"Registering datasets: {json_catalog_path}") VisslDatasetCatalog.clear() VisslDatasetCatalog.register_json(json_catalog_path) diff --git a/vissl/data/datasets/coco.py b/vissl/data/datasets/coco.py index ce8eb6566..e36ce669a 100644 --- a/vissl/data/datasets/coco.py +++ b/vissl/data/datasets/coco.py @@ -14,7 +14,7 @@ import os import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr # COCO API from vissl.utils.io import makedir, save_file @@ -60,8 +60,8 @@ def get_coco_imgs_labels_info(split, data_source_dir, args): from pycocotools.coco import COCO json_file = f"{data_source_dir}/annotations/instances_{split}2014.json" - assert PathManager.exists(json_file), "Annotations file does not exist. Abort" - json_data = json.load(PathManager.open(json_file, "r")) + assert g_pathmgr.exists(json_file), "Annotations file does not exist. Abort" + json_data = json.load(g_pathmgr.open(json_file, "r")) image_index = [x["id"] for x in json_data["images"]] coco = COCO(json_file) diff --git a/vissl/data/datasets/pascal_voc.py b/vissl/data/datasets/pascal_voc.py index f37548f18..198cedff2 100644 --- a/vissl/data/datasets/pascal_voc.py +++ b/vissl/data/datasets/pascal_voc.py @@ -14,7 +14,7 @@ from glob import glob import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.io import makedir, save_file @@ -40,7 +40,7 @@ def validate_files(input_files): def get_data_files(split, data_source_dir): data_dir = f"{data_source_dir}/ImageSets/Main" - assert PathManager.exists(data_dir), "Data: {} doesn't exist".format(data_dir) + assert g_pathmgr.exists(data_dir), "Data: {} doesn't exist".format(data_dir) test_data_files = glob(os.path.join(data_dir, "*_test.txt")) test_data_files = validate_files(test_data_files) train_data_files = glob(os.path.join(data_dir, "*_trainval.txt")) @@ -56,7 +56,7 @@ def get_data_files(split, data_source_dir): def get_voc_images_labels_info(split, data_source_dir): - assert PathManager.exists(data_source_dir), "Data source NOT found. Abort" + assert g_pathmgr.exists(data_source_dir), "Data source NOT found. Abort" data_files = get_data_files(split, data_source_dir) # we will construct a map for image name to the vector of -1, 0, 1 # we sort the data_files which gives sorted class names as well @@ -64,7 +64,7 @@ def get_voc_images_labels_info(split, data_source_dir): for cls_num, data_path in enumerate(sorted(data_files)): # for this class, we have images and each image will have label # 1, -1, 0 -> present, not present, ignore respectively as in VOC data. - with PathManager.open(data_path, "r") as fopen: + with g_pathmgr.open(data_path, "r") as fopen: for line in fopen: try: img_name, orig_label = line.strip().split() diff --git a/vissl/data/disk_dataset.py b/vissl/data/disk_dataset.py index c8c0231e3..19b3c2dbd 100644 --- a/vissl/data/disk_dataset.py +++ b/vissl/data/disk_dataset.py @@ -5,7 +5,7 @@ import logging -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from PIL import Image from torchvision.datasets import ImageFolder from vissl.data.data_helper import QueueDataset, get_mean_image @@ -54,9 +54,9 @@ def __init__(self, cfg, data_source, path, split, dataset_name): "disk_folder", ], "data_source must be either disk_filelist or disk_folder" if data_source == "disk_filelist": - assert PathManager.isfile(path), f"File {path} does not exist" + assert g_pathmgr.isfile(path), f"File {path} does not exist" elif data_source == "disk_folder": - assert PathManager.isdir(path), f"Directory {path} does not exist" + assert g_pathmgr.isdir(path), f"Directory {path} does not exist" self.cfg = cfg self.split = split self.dataset_name = dataset_name @@ -144,7 +144,7 @@ def __getitem__(self, idx): replace_prefix=self._remove_prefix, new_prefix=self._new_prefix, ) - with PathManager.open(image_path, "rb") as fopen: + with g_pathmgr.open(image_path, "rb") as fopen: img = Image.open(fopen).convert("RGB") elif self.data_source == "disk_folder": img = self.image_dataset[idx][0] diff --git a/vissl/data/ssl_dataset.py b/vissl/data/ssl_dataset.py index a787ef8cd..d35b4026a 100644 --- a/vissl/data/ssl_dataset.py +++ b/vissl/data/ssl_dataset.py @@ -8,7 +8,7 @@ import numpy as np from classy_vision.generic.distributed_util import get_world_size -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.data import dataset_catalog from vissl.data.data_helper import balanced_sub_sampling, unbalanced_sub_sampling @@ -168,22 +168,22 @@ def load_single_label_file(self, path: str): To save memory, if the mmap_mode is set to True for loading, we try to load the images in mmap_mode. If it fails, we simply load the labels without mmap """ - assert PathManager.isfile(path), f"Path to labels {path} is not a file" + assert g_pathmgr.isfile(path), f"Path to labels {path} is not a file" assert path.endswith("npy"), "Please specify a numpy file for labels" if self.cfg["DATA"][self.split].MMAP_MODE: try: - with PathManager.open(path, "rb") as fopen: + with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True, mmap_mode="r") except ValueError as e: - logging.info(f"Could not mmap {path}: {e}. Trying without PathManager") + logging.info(f"Could not mmap {path}: {e}. Trying without g_pathmgr") labels = np.load(path, allow_pickle=True, mmap_mode="r") - logging.info("Successfully loaded without PathManager") + logging.info("Successfully loaded without g_pathmgr") except Exception: - logging.info("Could not mmap without PathManager. Trying without mmap") - with PathManager.open(path, "rb") as fopen: + logging.info("Could not mmap without g_pathmgr. Trying without mmap") + with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) else: - with PathManager.open(path, "rb") as fopen: + with g_pathmgr.open(path, "rb") as fopen: labels = np.load(fopen, allow_pickle=True) return labels diff --git a/vissl/data/ssl_transforms/shuffle_img_patches.py b/vissl/data/ssl_transforms/shuffle_img_patches.py index 92617790c..d601d3588 100644 --- a/vissl/data/ssl_transforms/shuffle_img_patches.py +++ b/vissl/data/ssl_transforms/shuffle_img_patches.py @@ -12,7 +12,7 @@ import torch from classy_vision.dataset.transforms import register_transform from classy_vision.dataset.transforms.classy_transform import ClassyTransform -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.io import cache_url, is_url, load_file @@ -42,7 +42,7 @@ def _load_perms(self): cache_dir = os.path.join(temp_cache_dir, "perm_file_cache") cached_url_path = cache_url(url=self.perm_file, cache_dir=cache_dir) self.perm_file = cached_url_path - assert PathManager.exists( + assert g_pathmgr.exists( self.perm_file ), f"Permutation file NOT found: {self.perm_file}" logging.info(f"Loading permutation: {self.perm_file}") diff --git a/vissl/data/torchvision_dataset.py b/vissl/data/torchvision_dataset.py index 61c16bb9c..95bf911dc 100644 --- a/vissl/data/torchvision_dataset.py +++ b/vissl/data/torchvision_dataset.py @@ -5,7 +5,7 @@ from typing import List, Tuple -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from PIL import Image from torch.utils.data import Dataset from torchvision.datasets import CIFAR10, CIFAR100, MNIST, STL10, SVHN @@ -42,7 +42,7 @@ def __init__( self, cfg: AttrDict, data_source: str, path: str, split: str, dataset_name: str ): super().__init__() - assert PathManager.isdir(path), f"Directory {path} does not exist" + assert g_pathmgr.isdir(path), f"Directory {path} does not exist" self.dataset_name = dataset_name self.path = path self.split = split.lower() diff --git a/vissl/hooks/log_hooks.py b/vissl/hooks/log_hooks.py index dcbf88e06..9194dde3b 100644 --- a/vissl/hooks/log_hooks.py +++ b/vissl/hooks/log_hooks.py @@ -19,7 +19,7 @@ from classy_vision.generic.distributed_util import get_rank, is_primary from classy_vision.hooks.classy_hook import ClassyHook from fairscale.nn.data_parallel import FullyShardedDataParallel as FSDP -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.checkpoint import CheckpointWriter, is_checkpoint_phase from vissl.utils.env import get_machine_local_and_dist_rank from vissl.utils.io import save_file @@ -184,7 +184,7 @@ def __init__( self.btime_freq: Optional[int] = btime_freq self.json_stdout_logger = None if is_primary(): - self.json_stdout_logger = PathManager.open( + self.json_stdout_logger = g_pathmgr.open( f"{checkpoint_folder}/stdout.json", mode="a", buffering=10 * 1024, # 10KB @@ -331,9 +331,9 @@ def on_forward(self, task: "tasks.ClassyTask") -> None: input_sample_file = ( f"{task.checkpoint_folder}/rank{dist_rank}_input_sample.pth" ) - with PathManager.open(model_output_file, "wb") as fwrite: + with g_pathmgr.open(model_output_file, "wb") as fwrite: torch.save(model_output, fwrite) - with PathManager.open(input_sample_file, "wb") as fwrite: + with g_pathmgr.open(input_sample_file, "wb") as fwrite: torch.save(task.last_batch.sample, fwrite) logging.info(f"Saved model output: {model_output_file}") logging.info(f"Saved model input: {input_sample_file}") diff --git a/vissl/losses/swav_loss.py b/vissl/losses/swav_loss.py index c56c1fd09..8b4464cba 100644 --- a/vissl/losses/swav_loss.py +++ b/vissl/losses/swav_loss.py @@ -18,7 +18,7 @@ get_world_size, ) from classy_vision.losses import ClassyLoss, register_loss -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from torch import nn from vissl.config import AttrDict from vissl.losses.distibuted_sinkhornknopp import distributed_sinkhornknopp @@ -279,9 +279,9 @@ def forward(self, scores: torch.Tensor, head_id: int): self.output_dir, "rank" + str(self.dist_rank) + "_assignments" + str(i) + ".pth", ) - with PathManager.open(scores_output_file, "wb") as fwrite: + with g_pathmgr.open(scores_output_file, "wb") as fwrite: torch.save(scores, fwrite) - with PathManager.open(assignments_out_file, "wb") as fwrite: + with g_pathmgr.open(assignments_out_file, "wb") as fwrite: torch.save(assignments, fwrite) logging.info(f"Saved the scores matrix to: {scores_output_file}") logging.info(f"Saved the assignment matrix to: {assignments_out_file}") diff --git a/vissl/trainer/train_task.py b/vissl/trainer/train_task.py index 9fa805cbb..c81edcf40 100644 --- a/vissl/trainer/train_task.py +++ b/vissl/trainer/train_task.py @@ -14,7 +14,7 @@ from classy_vision.optim import build_optimizer, build_optimizer_schedulers from classy_vision.tasks import ClassificationTask, register_task from classy_vision.tasks.classification_task import AmpType, BroadcastBuffersMode -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from torch.cuda.amp import GradScaler as TorchGradScaler from vissl.config import AttrDict from vissl.data import ( @@ -422,7 +422,7 @@ def _restore_model_weights(self, model): assert init_weights_path, "Shouldn't call this when init_weight_path is empty" logging.info(f"Initializing model from: {init_weights_path}") - if PathManager.exists(init_weights_path): + if g_pathmgr.exists(init_weights_path): checkpoint = CheckpointLoader.load_and_broadcast_init_weights( checkpoint_path=init_weights_path, device=torch.device("cpu") ) @@ -480,7 +480,7 @@ def _build_model(self): self.checkpoint_path is None and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"] ): - assert PathManager.exists( + assert g_pathmgr.exists( self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"] ), "Specified PARAMS_FILE does NOT exist" # If we want to initialize the model in case of finetuning or evaluation, @@ -489,7 +489,7 @@ def _build_model(self): if ( self.checkpoint_path is None and self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"] - and PathManager.exists(self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]) + and g_pathmgr.exists(self.config["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]) ): model = self._restore_model_weights(model) diff --git a/vissl/utils/benchmark_suite_scheduler.py b/vissl/utils/benchmark_suite_scheduler.py index 1d1e5fb1e..0be0c8f93 100644 --- a/vissl/utils/benchmark_suite_scheduler.py +++ b/vissl/utils/benchmark_suite_scheduler.py @@ -13,7 +13,7 @@ from typing import List import submitit -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config.attr_dict import AttrDict from vissl.utils.distributed_launcher import launch_distributed_on_slurm from vissl.utils.hydra_config import compose_hydra_configuration, convert_to_attrdict @@ -163,7 +163,7 @@ def evaluate(self): ) if ( - PathManager.exists(self.training_checkpoint_file) + g_pathmgr.exists(self.training_checkpoint_file) and self._max_training_iterations() ): # Load training yaml config. @@ -206,10 +206,10 @@ def _max_training_iterations(self): ) # If the stdout.json path doesn't exist, return None. - if not PathManager.exists(training_stdout_json_file): + if not g_pathmgr.exists(training_stdout_json_file): return None - with PathManager.open(training_stdout_json_file, "rb") as f: + with g_pathmgr.open(training_stdout_json_file, "rb") as f: # First line of stdout.json must have max_iterations in the first line try: first_json_line = json.loads(next(f)) @@ -294,10 +294,10 @@ def _validate_evaluation_setup(self): % self.training_config.CHECKPOINT.CHECKPOINT_FREQUENCY ) == 0, "Evaluation phase frequency must evenly divide the checkpoint phase frequency" # NOQA - assert PathManager.exists( + assert g_pathmgr.exists( self.training_config.SLURM.LOG_FOLDER ), "Training slurm log folder must exist" - assert PathManager.exists( + assert g_pathmgr.exists( self.training_config.CHECKPOINT.DIR ), "Training slurm checkpoint folder must exist" @@ -315,7 +315,7 @@ def _evaluate_checkpoints(self): checkpoint_str = os.path.join( self.training_config.CHECKPOINT.DIR, f"{ checkpoint_str }.torch" ) - if PathManager.exists(checkpoint_str): + if g_pathmgr.exists(checkpoint_str): self._evaluate_checkpoint(checkpoint_str, benchmarks) def _evaluate_checkpoint(self, checkpoint_str, benchmarks): @@ -382,7 +382,7 @@ def _launch_slurm_job(self, args, config): return launch_distributed_on_slurm(engine_name=args.engine_name, cfg=config) def _write_json_file(self, data, file_name): - with PathManager.open(file_name, "w") as fopen: + with g_pathmgr.open(file_name, "w") as fopen: fopen.write(json.dumps(data, sort_keys=True)) fopen.flush() @@ -435,10 +435,10 @@ def _update_benchmark_state(self, benchmark, job): def _get_benchmark_metrics(self, benchmark): metrics_file = os.path.join(benchmark["slurm_checkpoint_dir"], "metrics.json") - if PathManager.exists(metrics_file): + if g_pathmgr.exists(metrics_file): # Open metrics file from finished evaluation job. metrics = [] - with PathManager.open(metrics_file, "rb") as f: + with g_pathmgr.open(metrics_file, "rb") as f: for line in f: metrics.append(json.loads(line)) @@ -476,7 +476,7 @@ def _generate_initial_benchmark_results(self): ) autoload_slurm_evaluator_checkpoint = ( self.autoload_slurm_evaluator_checkpoint - and PathManager.exists(default_checkpoint) + and g_pathmgr.exists(default_checkpoint) ) if autoload_slurm_evaluator_checkpoint or self.slurm_evaluator_checkpoint: diff --git a/vissl/utils/checkpoint.py b/vissl/utils/checkpoint.py index 8b74f53ce..545653a66 100644 --- a/vissl/utils/checkpoint.py +++ b/vissl/utils/checkpoint.py @@ -17,7 +17,7 @@ save_checkpoint, ) from fairscale.nn import FullyShardedDataParallel -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.utils.env import get_machine_local_and_dist_rank from vissl.utils.io import abspath, create_file_symlink, makedir @@ -241,7 +241,7 @@ def sharded_to_consolidated_checkpoint( }, } logging.info(f"Saving consolidated checkpoint at: {output_checkpoint_path}") - with PathManager.open(output_checkpoint_path, "wb") as f: + with g_pathmgr.open(output_checkpoint_path, "wb") as f: torch.save(consolidated_checkpoint, f) logging.info(f"Done! Checkpoint available at: {output_checkpoint_path}") @@ -271,14 +271,14 @@ def sharded_to_sliced_checkpoint( "type": CheckpointItemType.slice_list.name, "layers": saved_parameters, } - with PathManager.open(output_checkpoint_path, "wb") as f: + with g_pathmgr.open(output_checkpoint_path, "wb") as f: torch.save(checkpoint_list, f) logging.info(f"Done! Checkpoint available at: {output_checkpoint_path}") @classmethod def _read_shards(cls, input_checkpoint_path: str, device="cpu"): logging.info(f"Reading sharded checkpoint from: {input_checkpoint_path}") - with PathManager.open(input_checkpoint_path, "rb") as f: + with g_pathmgr.open(input_checkpoint_path, "rb") as f: checkpoint = torch.load(f, map_location=device) assert checkpoint["type"] == CheckpointItemType.shard_list.name @@ -288,7 +288,7 @@ def _read_shards(cls, input_checkpoint_path: str, device="cpu"): checkpoint_folder = os.path.split(input_checkpoint_path)[0] shard_path = os.path.join(checkpoint_folder, shard_path) - with PathManager.open(shard_path, "rb") as f: + with g_pathmgr.open(shard_path, "rb") as f: shard_content = torch.load(f, map_location=device) trunk_data = shard_content["classy_state_dict"]["base_model"]["model"][ @@ -341,7 +341,7 @@ def save_model_weights(cls, model: FullyShardedDataParallel, checkpoint_path: st "type": CheckpointItemType.slice_list.name, "layers": saved_parameters, } - with PathManager.open(checkpoint_path, "wb") as f: + with g_pathmgr.open(checkpoint_path, "wb") as f: torch.save(checkpoint_list, f) @classmethod @@ -358,7 +358,7 @@ def save_slice(cls, checkpoint_path: str, param_path: str, param) -> str: file_path = os.path.join(checkpoint_sub_folder, f"{hash_name}.torch") file_path = abspath(file_path) checkpoint_slice = {"type": CheckpointItemType.slice.name, "weight": param} - with PathManager.open(file_path, "wb") as f: + with g_pathmgr.open(file_path, "wb") as f: torch.save(checkpoint_slice, f) return file_path @@ -386,7 +386,7 @@ def _init_weight_from_slice( weight_path = cls._clean_path(weight_path) file_name = checkpoint["layers"].get(weight_path, None) assert file_name is not None, f"Could not find buffer: {weight_path}" - with PathManager.open(file_name, "rb") as f: + with g_pathmgr.open(file_name, "rb") as f: layer_checkpoint = torch.load(f) assert layer_checkpoint["type"] == CheckpointItemType.slice.name weight.copy_(layer_checkpoint["weight"]) @@ -461,7 +461,7 @@ def get_checkpoint_folder(config: AttrDict): odir = f"{odir}/{config.DISTRIBUTED.RUN_ID}" makedir(odir) - assert PathManager.exists( + assert g_pathmgr.exists( config.CHECKPOINT.DIR ), f"Please specify config.CHECKPOINT.DIR parameter. Invalid: {config.CHECKPOINT.DIR}" return odir @@ -512,7 +512,7 @@ def has_checkpoint(checkpoint_folder: str, skip_final: bool = False): Returns: checkpoint_exists (bool): whether checkpoint exists or not """ - checkpointed_files = PathManager.ls(checkpoint_folder) + checkpointed_files = g_pathmgr.ls(checkpoint_folder) checkpoint_exists = False for f in checkpointed_files: if f.endswith(".torch") and ("model_final" not in f or not skip_final): @@ -535,7 +535,7 @@ def has_final_checkpoint( Returns: has_final_checkpoint: whether the final checkpoint exists or not """ - checkpointed_files = PathManager.ls(checkpoint_folder) + checkpointed_files = g_pathmgr.ls(checkpoint_folder) torch_files = filter(lambda x: x.endswith(".torch"), checkpointed_files) final_files = filter(lambda x: final_checkpoint_pattern in x, torch_files) return len(list(final_files)) > 0 @@ -561,7 +561,7 @@ def get_checkpoint_resume_files( Sometimes the latest checkpoints could be corrupt so this option helps to resume from instead a few checkpoints before the last checkpoint. """ - all_files = PathManager.ls(checkpoint_folder) + all_files = g_pathmgr.ls(checkpoint_folder) all_iters = [] replace_prefix = "model_phase" # if we checkpoint at iterations too, we start from an iteration checkpoint diff --git a/vissl/utils/cluster_utils.py b/vissl/utils/cluster_utils.py index 90734bf81..12c9dce29 100644 --- a/vissl/utils/cluster_utils.py +++ b/vissl/utils/cluster_utils.py @@ -9,7 +9,7 @@ import numpy as np import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.data import build_dataset @@ -32,7 +32,7 @@ class ClusterAssignmentLoader: @classmethod def save_cluster_assignment(cls, output_dir: str, assignments: ClusterAssignment): output_file = os.path.join(output_dir, "cluster_assignments.torch") - with PathManager.open(output_file, "wb") as f: + with g_pathmgr.open(output_file, "wb") as f: content = { cls._CONFIG_KEY: assignments.config, cls._ASSIGN_KEY: assignments.cluster_assignments, @@ -41,7 +41,7 @@ def save_cluster_assignment(cls, output_dir: str, assignments: ClusterAssignment @classmethod def load_cluster_assigment(cls, file_path: str) -> ClusterAssignment: - with PathManager.open(file_path, "rb") as f: + with g_pathmgr.open(file_path, "rb") as f: content = torch.load(f) return ClusterAssignment( config=content[cls._CONFIG_KEY], diff --git a/vissl/utils/distributed_launcher.py b/vissl/utils/distributed_launcher.py index dadc2eb77..8e1d0af73 100644 --- a/vissl/utils/distributed_launcher.py +++ b/vissl/utils/distributed_launcher.py @@ -13,7 +13,7 @@ from typing import Any, Callable, List import torch -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.config import AttrDict from vissl.data.dataset_catalog import get_data_files from vissl.engines import run_engine @@ -109,7 +109,7 @@ def launch_distributed( # Get the checkpoint where to resume from. The get_resume_checkpoint function will # automatically take care of detecting whether it's a resume or not. symlink_checkpoint_path = f"{checkpoint_folder}/checkpoint.torch" - if cfg.CHECKPOINT.USE_SYMLINK_CHECKPOINT_FOR_RESUME and PathManager.exists( + if cfg.CHECKPOINT.USE_SYMLINK_CHECKPOINT_FOR_RESUME and g_pathmgr.exists( symlink_checkpoint_path ): checkpoint_path = f"{checkpoint_folder}/checkpoint.torch" @@ -125,7 +125,7 @@ def launch_distributed( if checkpoint_path is None and cfg["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"]: params_file = cfg["MODEL"]["WEIGHTS_INIT"]["PARAMS_FILE"] error_message = f"Specified PARAMS_FILE does NOT exist: {params_file}" - assert PathManager.exists(params_file), error_message + assert g_pathmgr.exists(params_file), error_message # copy the data to local if user wants. This can speed up dataloading. _copy_to_local(cfg) @@ -245,7 +245,7 @@ def launch_distributed_on_slurm(cfg: AttrDict, engine_name: str): # setup the log folder log_folder = cfg.SLURM.LOG_FOLDER makedir(log_folder) - assert PathManager.exists( + assert g_pathmgr.exists( log_folder ), f"Specified config.SLURM.LOG_FOLDER={log_folder} doesn't exist" assert cfg.SLURM.PARTITION, "SLURM.PARTITION must be set when using SLURM" diff --git a/vissl/utils/download.py b/vissl/utils/download.py index de2315c11..4a2fb1cf1 100644 --- a/vissl/utils/download.py +++ b/vissl/utils/download.py @@ -87,7 +87,7 @@ def download_google_drive_url(url: str, output_path: str, output_file_name: str) progress_bar.update(len(block)) -# The following methods are copied from torchvision, but we use PathManager +# The following methods are copied from torchvision, but we use g_pathmgr # instead of `os` lib to support multiple distributed file systems. def _get_google_drive_file_id(url: str) -> Optional[str]: parts = urlparse(url) diff --git a/vissl/utils/env.py b/vissl/utils/env.py index 487f35e90..381993953 100644 --- a/vissl/utils/env.py +++ b/vissl/utils/env.py @@ -33,7 +33,7 @@ def set_env_vars(local_rank: int, node_id: int, cfg: AttrDict): def setup_path_manager(): """ - Registering the right options for the PathManager: + Registering the right options for the g_pathmgr: Override this function in your build system to support different distributed file system """ diff --git a/vissl/utils/extract_features_utils.py b/vissl/utils/extract_features_utils.py index 5fe04e238..0679ce7e4 100644 --- a/vissl/utils/extract_features_utils.py +++ b/vissl/utils/extract_features_utils.py @@ -10,7 +10,7 @@ from typing import List, NamedTuple import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.io import load_file @@ -59,7 +59,7 @@ def get_shard_file_names( # dataset split and a given layer feature_regex = re.compile(rf"(.*)_{split}_{layer}_features.npy") prefixes = [] - for file_path in PathManager.ls(input_dir): + for file_path in g_pathmgr.ls(input_dir): match = feature_regex.match(file_path) if match is not None: prefixes.append(match.group(1)) diff --git a/vissl/utils/instance_retrieval_utils/data_util.py b/vissl/utils/instance_retrieval_utils/data_util.py index 871b86144..66eee0dcd 100644 --- a/vissl/utils/instance_retrieval_utils/data_util.py +++ b/vissl/utils/instance_retrieval_utils/data_util.py @@ -14,7 +14,7 @@ import scipy.io import torch import torchvision.transforms.functional as TF -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from PIL import Image, ImageFile from torch.nn import functional as F from torchvision import transforms @@ -218,7 +218,7 @@ class WhiteningTrainingImageDataset: """ def __init__(self, base_dir: str, image_list_file: str, num_samples: int = 0): - with PathManager.open(image_list_file) as fopen: + with g_pathmgr.open(image_list_file) as fopen: self.image_list = fopen.readlines() if num_samples > 0: self.image_list = self.image_list[:num_samples] @@ -504,7 +504,7 @@ def load_and_prepare_whitening_image(self, fname): from the filename, load the whitening image and prepare it to be used by applying data transforms """ - with PathManager.open(fname, "rb") as f: + with g_pathmgr.open(fname, "rb") as f: im = Image.open(f) if im.mode != "RGB": im = im.convert(mode="RGB") @@ -517,7 +517,7 @@ def load_and_prepare_instre_image(self, fname): from the filename, load the db or query image and prepare it to be used by applying data transforms """ - with PathManager.open(fname, "rb") as f: + with g_pathmgr.open(fname, "rb") as f: im = Image.open(f) if self.transforms is not None: im = self.transforms(im) @@ -530,7 +530,7 @@ def load_and_prepare_image(self, fname, roi=None): the image once again. ROI format is (xmin,ymin,xmax,ymax) """ # Read image, get aspect ratio, and resize such as the largest side equals S - with PathManager.open(fname, "rb") as f: + with g_pathmgr.open(fname, "rb") as f: img = Image.open(f).convert(mode="RGB") im_resized, ratio = self.apply_img_transform(img) # If there is a roi, adapt the roi to the new size and crop. Do not rescale @@ -552,7 +552,7 @@ def load_and_prepare_revisited_image(self, img_path, roi=None): # open path as file to avoid ResourceWarning # (https://github.com/python-pillow/Pillow/issues/835) - with PathManager.open(img_path, "rb") as f: + with g_pathmgr.open(img_path, "rb") as f: img = Image.open(f).convert("RGB") im_resized, ratio = self.apply_img_transform(img) @@ -600,7 +600,7 @@ def __init__( def _get_filenames(self, data_path: str): fnames = [] - for fname in sorted(PathManager.ls(data_path)): + for fname in sorted(g_pathmgr.ls(data_path)): # Only put images in fnames. if not fname.endswith(".jpg"): continue @@ -722,11 +722,11 @@ def load(self, num_samples=None): self.lab_root = f"{self.path}/lab/" self.img_root = f"{self.path}/jpg/" logging.info(f"Loading data: {self.path}") - lab_filenames = np.sort(PathManager.ls(self.lab_root)) + lab_filenames = np.sort(g_pathmgr.ls(self.lab_root)) # Get the filenames without the extension self.img_filenames = [ e[:-4] - for e in np.sort(PathManager.ls(self.img_root)) + for e in np.sort(g_pathmgr.ls(self.img_root)) if e[:-4] not in self.blacklisted ] @@ -746,7 +746,7 @@ def load(self, num_samples=None): for e in lab_filenames: if e.endswith("_query.txt"): q_name = e[: -len("_query.txt")] - with PathManager.open(f"{self.lab_root}/{e}") as fopen: + with g_pathmgr.open(f"{self.lab_root}/{e}") as fopen: q_data = fopen.readline().split(" ") if q_data[0].startswith("oxc1_"): q_filename = q_data[0][5:] @@ -754,11 +754,11 @@ def load(self, num_samples=None): q_filename = q_data[0] self.filename_to_name[q_filename] = q_name self.name_to_filename[q_name] = q_filename - with PathManager.open(f"{self.lab_root}/{q_name}_ok.txt") as fopen: + with g_pathmgr.open(f"{self.lab_root}/{q_name}_ok.txt") as fopen: good = {e.strip() for e in fopen} - with PathManager.open(f"{self.lab_root}/{q_name}_good.txt") as fopen: + with g_pathmgr.open(f"{self.lab_root}/{q_name}_good.txt") as fopen: good = good.union({e.strip() for e in fopen}) - with PathManager.open(f"{self.lab_root}/{q_name}_junk.txt") as fopen: + with g_pathmgr.open(f"{self.lab_root}/{q_name}_junk.txt") as fopen: junk = {e.strip() for e in fopen} good_plus_junk = good.union(junk) self.relevants[q_name] = [ @@ -813,7 +813,7 @@ def score_rnk_partial(self, i, idx, temp_dir): """ rnk = np.array(self.img_filenames[: self.N_images])[idx] - with PathManager.open(f"{temp_dir}/{self.q_names[i]}.rnk", "w") as f: + with g_pathmgr.open(f"{temp_dir}/{self.q_names[i]}.rnk", "w") as f: f.write("\n".join(rnk) + "\n") cmd = ( @@ -894,7 +894,7 @@ def __init__( def _get_filenames(self, data_path: str): fnames = [] - for fname in sorted(PathManager.ls(data_path)): + for fname in sorted(g_pathmgr.ls(data_path)): # Only put images in fnames. if not fname.endswith(".jpg"): continue diff --git a/vissl/utils/io.py b/vissl/utils/io.py index ee26300f3..8d38f77cd 100644 --- a/vissl/utils/io.py +++ b/vissl/utils/io.py @@ -13,8 +13,8 @@ import numpy as np import yaml -from fvcore.common.download import download -from fvcore.common.file_io import PathManager, file_lock +from iopath.common.download import download +from iopath.common.file_io import g_pathmgr, file_lock from vissl.utils.slurm import get_slurm_dir @@ -44,9 +44,9 @@ def create_file_symlink(file1, file2): latest successful checkpoint. """ try: - if PathManager.exists(file2): - PathManager.rm(file2) - PathManager.symlink(file1, file2) + if g_pathmgr.exists(file2): + g_pathmgr.rm(file2) + g_pathmgr.symlink(file1, file2) except Exception as e: logging.info(f"Could NOT create symlink. Error: {e}") @@ -63,22 +63,22 @@ def save_file(data, filename, append_to_json=True, verbose=True): logging.info(f"Saving data to file: {filename}") file_ext = os.path.splitext(filename)[1] if file_ext in [".pkl", ".pickle"]: - with PathManager.open(filename, "wb") as fopen: + with g_pathmgr.open(filename, "wb") as fopen: pickle.dump(data, fopen, pickle.HIGHEST_PROTOCOL) elif file_ext == ".npy": - with PathManager.open(filename, "wb") as fopen: + with g_pathmgr.open(filename, "wb") as fopen: np.save(fopen, data) elif file_ext == ".json": if append_to_json: - with PathManager.open(filename, "a") as fopen: + with g_pathmgr.open(filename, "a") as fopen: fopen.write(json.dumps(data, sort_keys=True) + "\n") fopen.flush() else: - with PathManager.open(filename, "w") as fopen: + with g_pathmgr.open(filename, "w") as fopen: fopen.write(json.dumps(data, sort_keys=True) + "\n") fopen.flush() elif file_ext == ".yaml": - with PathManager.open(filename, "w") as fopen: + with g_pathmgr.open(filename, "w") as fopen: dump = yaml.dump(data) fopen.write(dump) fopen.flush() @@ -101,31 +101,31 @@ def load_file(filename, mmap_mode=None): logging.info(f"Loading data from file: {filename}") file_ext = os.path.splitext(filename)[1] if file_ext in [".pkl", ".pickle"]: - with PathManager.open(filename, "rb") as fopen: + with g_pathmgr.open(filename, "rb") as fopen: data = pickle.load(fopen, encoding="latin1") elif file_ext == ".npy": if mmap_mode: try: - with PathManager.open(filename, "rb") as fopen: + with g_pathmgr.open(filename, "rb") as fopen: data = np.load(fopen, encoding="latin1", mmap_mode=mmap_mode) except ValueError as e: logging.info( - f"Could not mmap {filename}: {e}. Trying without PathManager" + f"Could not mmap {filename}: {e}. Trying without g_pathmgr" ) data = np.load(filename, encoding="latin1", mmap_mode=mmap_mode) - logging.info("Successfully loaded without PathManager") + logging.info("Successfully loaded without g_pathmgr") except Exception: - logging.info("Could not mmap without PathManager. Trying without mmap") - with PathManager.open(filename, "rb") as fopen: + logging.info("Could not mmap without g_pathmgr. Trying without mmap") + with g_pathmgr.open(filename, "rb") as fopen: data = np.load(fopen, encoding="latin1") else: - with PathManager.open(filename, "rb") as fopen: + with g_pathmgr.open(filename, "rb") as fopen: data = np.load(fopen, encoding="latin1") elif file_ext == ".json": - with PathManager.open(filename, "r") as fopen: + with g_pathmgr.open(filename, "r") as fopen: data = json.load(fopen) elif file_ext == ".yaml": - with PathManager.open(filename, "r") as fopen: + with g_pathmgr.open(filename, "r") as fopen: data = yaml.load(fopen, Loader=yaml.FullLoader) else: raise Exception(f"Reading from {file_ext} is not supported yet") @@ -150,8 +150,8 @@ def makedir(dir_path): """ is_success = False try: - if not PathManager.exists(dir_path): - PathManager.mkdirs(dir_path) + if not g_pathmgr.exists(dir_path): + g_pathmgr.mkdirs(dir_path) is_success = True except BaseException: logging.info(f"Error creating directory: {dir_path}") @@ -171,7 +171,7 @@ def cleanup_dir(dir): Utility for deleting a directory. Useful for cleaning the storage space that contains various training artifacts like checkpoints, data etc. """ - if PathManager.exists(dir): + if g_pathmgr.exists(dir): logging.info(f"Deleting directory: {dir}") os.system(f"rm -rf {dir}") logging.info(f"Deleted contents of directory: {dir}") @@ -190,8 +190,8 @@ def copy_file(input_file, destination_dir, tmp_destination_dir): Copy a given input_file from source to the destination directory. Steps: - 1. We use PathManager to extract the data to local path. - 2. we simply move the files from the PathManager cached local directory + 1. We use g_pathmgr to extract the data to local path. + 2. we simply move the files from the g_pathmgr cached local directory to the user specified destination directory. We use rsync. How destination dir is chosen: a) If user is using slurm, we set destination_dir = slurm_dir (see get_slurm_dir) @@ -203,10 +203,10 @@ def copy_file(input_file, destination_dir, tmp_destination_dir): output_file (str): the new path of the file destination_dir (str): the destination dir that was actually used """ - # we first extract the local path for the files. PathManager + # we first extract the local path for the files. g_pathmgr # determines the local path itself and copies data there. logging.info(f"Copying {input_file} to local path...") - out = PathManager.get_local_path(input_file) + out = g_pathmgr.get_local_path(input_file) output_dir = os.path.dirname(out) logging.info(f"File coped to: {out}") @@ -217,7 +217,7 @@ def copy_file(input_file, destination_dir, tmp_destination_dir): f"destination directory: {destination_dir}" ) # if the user wants to copy the files to a specific location, - # we simply move the files from the PathManager cached directory + # we simply move the files from the g_pathmgr cached directory # to the user specified directory. destination_dir = get_slurm_dir(destination_dir) if "SLURM_JOBID" in os.environ: @@ -225,7 +225,7 @@ def copy_file(input_file, destination_dir, tmp_destination_dir): if destination_dir is not None: makedir(destination_dir) output_file = f"{destination_dir}/{os.path.basename(input_file)}" - if PathManager.exists(output_file): + if g_pathmgr.exists(output_file): logging.info(f"File already copied: {output_file}") return output_file, destination_dir @@ -264,7 +264,7 @@ def copy_dir(input_dir, destination_dir, num_threads): destination_dir = f"{destination_dir}/{data_name}" makedir(destination_dir) complete_flag = f"{destination_dir}/copy_complete" - if PathManager.isfile(complete_flag): + if g_pathmgr.isfile(complete_flag): logging.info(f"Found Data already copied: {destination_dir}...") return destination_dir logging.info( @@ -276,7 +276,7 @@ def copy_dir(input_dir, destination_dir, num_threads): f"rsync -ruW --inplace {{}} {destination_dir}" ) os.system(cmd) - PathManager.open(complete_flag, "a").close() + g_pathmgr.open(complete_flag, "a").close() logging.info("Copied to local directory") return destination_dir, destination_dir @@ -297,11 +297,11 @@ def copy_data(input_file, destination_dir, num_threads, tmp_destination_dir): makedir(destination_dir) else: destination_dir = None - if PathManager.isfile(input_file): + if g_pathmgr.isfile(input_file): output_file, output_dir = copy_file( input_file, destination_dir, tmp_destination_dir ) - elif PathManager.isdir(input_file): + elif g_pathmgr.isdir(input_file): output_file, output_dir = copy_dir(input_file, destination_dir, num_threads) else: raise RuntimeError("The input_file is neither a file nor a directory") diff --git a/vissl/utils/logger.py b/vissl/utils/logger.py index c24287e4e..bcf4199a4 100644 --- a/vissl/utils/logger.py +++ b/vissl/utils/logger.py @@ -9,7 +9,7 @@ import subprocess import sys -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from vissl.utils.io import makedir @@ -61,7 +61,7 @@ def _cached_log_stream(filename): # we tune the buffering value so that the logs are updated # frequently. log_buffer_kb = 10 * 1024 # 10KB - io = PathManager.open(filename, mode="a", buffering=log_buffer_kb) + io = g_pathmgr.open(filename, mode="a", buffering=log_buffer_kb) atexit.register(io.close) return io diff --git a/vissl/utils/misc.py b/vissl/utils/misc.py index 5bfc9fb19..6e988b86b 100644 --- a/vissl/utils/misc.py +++ b/vissl/utils/misc.py @@ -15,7 +15,7 @@ import pkg_resources import torch import torch.multiprocessing as mp -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from scipy.sparse import csr_matrix from vissl.utils.extract_features_utils import ExtractedFeaturesLoader @@ -198,7 +198,7 @@ def get_json_catalog_path(default_dataset_catalog_path: str) -> str: # If catalog path is the default and we cannot find it, we want to continue without failing. if os.environ.get("VISSL_DATASET_CATALOG_PATH", False): - assert PathManager.exists( + assert g_pathmgr.exists( dataset_catalog_path ), f"Dataset catalog path: { dataset_catalog_path } not found." diff --git a/vissl/utils/svm_utils/svm_low_shot_trainer.py b/vissl/utils/svm_utils/svm_low_shot_trainer.py index d0591e744..f06430f7e 100644 --- a/vissl/utils/svm_utils/svm_low_shot_trainer.py +++ b/vissl/utils/svm_utils/svm_low_shot_trainer.py @@ -7,7 +7,7 @@ import pickle import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from sklearn.svm import LinearSVC from vissl.utils.io import load_file, save_file from vissl.utils.svm_utils.evaluate import get_precision_recall @@ -106,7 +106,7 @@ def train(self, features, targets, sample_num, low_shot_kvalue): cost = self.costs_list[cost_idx] suffix = f"sample{sample_num}_k{low_shot_kvalue}" out_file = self._get_svm_low_shot_model_filename(cls_num, cost, suffix) - if PathManager.exists(out_file) and not self.config.force_retrain: + if g_pathmgr.exists(out_file) and not self.config.force_retrain: logging.info(f"SVM model exists: {out_file}") continue logging.info(f"Training model with the cost: {cost}") @@ -134,7 +134,7 @@ def train(self, features, targets, sample_num, low_shot_kvalue): ) clf.fit(train_feats, train_cls_labels) logging.info(f"Saving SVM model to: {out_file}") - with PathManager.open(out_file, "wb") as fwrite: + with g_pathmgr.open(out_file, "wb") as fwrite: pickle.dump(clf, fwrite) logging.info(f"Done training: sample: {sample_num} k-value: {low_shot_kvalue}") @@ -213,7 +213,7 @@ def aggregate_stats(self, k_values, sample_inds): sample_idx = sample_inds[inds] file_name = f"test_ap_sample{sample_idx}_k{k_low}.npy" filepath = f"{self.output_dir}/{file_name}" - if PathManager.exists(filepath): + if g_pathmgr.exists(filepath): k_val_output.append(load_file(filepath)) else: logging.info(f"file does not exist: {filepath}") diff --git a/vissl/utils/svm_utils/svm_trainer.py b/vissl/utils/svm_utils/svm_trainer.py index d04036150..53883135a 100644 --- a/vissl/utils/svm_utils/svm_trainer.py +++ b/vissl/utils/svm_utils/svm_trainer.py @@ -8,7 +8,7 @@ import threading import numpy as np -from fvcore.common.file_io import PathManager +from iopath.common.file_io import g_pathmgr from sklearn.model_selection import cross_val_score from sklearn.svm import LinearSVC from vissl.utils.io import load_file, save_file @@ -34,7 +34,7 @@ def __init__(self, config, layer, output_dir): def _get_output_dir(self, cfg_out_dir): odir = f"{cfg_out_dir}/{self.layer}" - PathManager.mkdirs(odir) + g_pathmgr.mkdirs(odir) logging.info(f"Output directory for SVM results: {odir}") return odir @@ -43,8 +43,8 @@ def load_input_data(self, data_file, targets_file): Given the input data (features) and targets (labels) files, load the features of shape N x D and labels of shape (N,) """ - assert PathManager.exists(data_file), "Data file not found. Abort!" - assert PathManager.exists(targets_file), "Targets file not found. Abort!" + assert g_pathmgr.exists(data_file), "Data file not found. Abort!" + assert g_pathmgr.exists(targets_file), "Targets file not found. Abort!" # load the features and the targets logging.info("loading features and targets...") targets = load_file(targets_file) @@ -105,9 +105,7 @@ def get_best_cost_value(self): """ crossval_ap_file = f"{self.output_dir}/crossval_ap.npy" chosen_cost_file = f"{self.output_dir}/chosen_cost.npy" - if PathManager.exists(crossval_ap_file) and PathManager.exists( - chosen_cost_file - ): + if g_pathmgr.exists(crossval_ap_file) and g_pathmgr.exists(chosen_cost_file): self.chosen_cost = load_file(chosen_cost_file) self.train_ap_matrix = load_file(crossval_ap_file) return self.chosen_cost @@ -143,8 +141,8 @@ def train_cls(self, features, targets, cls_num): cost = self.costs_list[cost_idx] out_file, ap_out_file = self._get_svm_model_filename(cls_num, cost) if ( - PathManager.exists(out_file) - and PathManager.exists(ap_out_file) + g_pathmgr.exists(out_file) + and g_pathmgr.exists(ap_out_file) and not self.config.force_retrain ): logging.info(f"SVM model exists: {out_file}") @@ -191,7 +189,7 @@ def train_cls(self, features, targets, cls_num): logging.info(f"Saving cls cost AP to: {ap_out_file}") save_file(np.array([ap_scores.mean()]), ap_out_file) logging.info(f"Saving SVM model to: {out_file}") - with PathManager.open(out_file, "wb") as fwrite: + with g_pathmgr.open(out_file, "wb") as fwrite: pickle.dump(clf, fwrite) def train(self, features, targets):