Module earthvision.datasets
Expand source code
from .drone_deploy import DroneDeploy
from .aerialcactus import AerialCactus
from .resisc45 import RESISC45
from .ucmercedland import UCMercedLand
from .eurosat import EuroSat
from .l8sparcs import L8SPARCS
from .deepsat import DeepSat
from .landcover import LandCover
from .cowc import COWC
from .l7irish import L7Irish
from .sentinel2cloud import Sentinel2Cloud
from .spacenet7 import SpaceNet7
from .xview import XView
from .cloud38 import Cloud38
from .so2sat import So2Sat
from .l8biome import L8Biome
__all__ = [
"DroneDeploy",
"AerialCactus",
"RESISC45",
"UCMercedLand",
"EuroSat",
"L8SPARCS",
"DeepSat",
"LandCover",
"COWC",
"L7Irish",
"Sentinel2Cloud",
"SpaceNet7",
"XView",
"Cloud38",
"So2Sat",
"L8Biome",
]
Sub-modules
earthvision.datasets.aerialcactus
-
Aerial Cactus Dataset from Kaggle.
earthvision.datasets.cloud38
-
38-Cloud: A Cloud Segmentation Dataset.
earthvision.datasets.cowc
-
Cars Overhead with Context Dataset.
earthvision.datasets.deepsat
-
Deepsat Dataset - Scene Classification.
earthvision.datasets.drone_deploy
-
Drone Deploy Dataset - Semantic Segmentation.
earthvision.datasets.eurosat
-
EuroSat Land Cover Categories Dataset.
earthvision.datasets.l7irish
-
Landsat 7 Irish Cloud Dataset.
earthvision.datasets.l8biome
-
L8 Biome Cloud Cover Dataset.
earthvision.datasets.l8sparcs
-
Landsat 8 SPARCS Cloud Dataset.
earthvision.datasets.landcover
-
The LandCover.ai (Land Cover from Aerial Imagery) Dataset.
earthvision.datasets.resisc45
-
RESISC45 Dataset.
earthvision.datasets.sentinel2cloud
-
Sentinel-2 Cloud Mask Catalogue Dataset.
earthvision.datasets.so2sat
-
So2Sat Dataset to Predict Local Climate Zone (LCZ).
earthvision.datasets.spacenet7
-
SpaceNet 7 Dataset: Multi-Temporal Urban Development Challenge - Instance Segmentation.
earthvision.datasets.spacenet7_utils
earthvision.datasets.ucmercedland
-
UC Merced Land Use Dataset.
earthvision.datasets.utils
-
Utility functions.
earthvision.datasets.vision
-
Vision Dataset from torchvision/datasets/vision.py
earthvision.datasets.xview
-
Dataset from DIUx xView 2018 Detection Challenge.
Classes
class AerialCactus (root: str, train: bool = True, transform=Compose( Resize(size=(32, 32), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)
-
Aerial Cactus Dataset.
https://www.kaggle.com/c/aerial-cactus-identification
Args
root
:string
- Root directory of dataset.
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from validation set.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class AerialCactus(VisionDataset): """Aerial Cactus Dataset. <https://www.kaggle.com/c/aerial-cactus-identification> Args: root (string): Root directory of dataset. train (bool, optional): If True, creates dataset from training set, otherwise creates from validation set. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://storage.googleapis.com/ossjr" resources = "cactus-aerial-photos.zip" def __init__( self, root: str, train: bool = True, transform=Compose([Resize((32, 32)), ToTensor()]), target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(AerialCactus, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root self.data_mode = "training_set" if train else "validation_set" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img_path = self.img_labels.iloc[idx, 0] img = np.array(_load_img(img_path)) target = self.img_labels.iloc[idx, 1] if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.img_labels) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" classes = {"cactus": 1, "no_cactus": 0} image_path, label = [], [] for cat, enc in classes.items(): cat_path = os.path.join( self.root, "cactus-aerial-photos", self.data_mode, self.data_mode, cat ) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df def _check_exists(self): self.train_path = os.path.join( self.root, "cactus-aerial-photos", "training_set", "training_set" ) self.valid_path = os.path.join( self.root, "cactus-aerial-photos", "validation_set", "validation_set" ) folder_status = [] for path in [self.train_path, self.valid_path]: for target in ["cactus", "no_cactus"]: folder_status.append(os.path.exists(os.path.join(path, target))) return all(folder_status) def download(self) -> None: """Download and extract file.""" os.makedirs(self.root, exist_ok=True) file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract file from compressed.""" path_destination = os.path.join(self.root, "cactus-aerial-photos") shutil.unpack_archive(os.path.join(self.root, self.resources), path_destination) os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
Download and extract file.
Expand source code
def download(self) -> None: """Download and extract file.""" os.makedirs(self.root, exist_ok=True) file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract file from compressed.
Expand source code
def extract_file(self) -> None: """Extract file from compressed.""" path_destination = os.path.join(self.root, "cactus-aerial-photos") shutil.unpack_archive(os.path.join(self.root, self.resources), path_destination) os.remove(os.path.join(self.root, self.resources))
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" classes = {"cactus": 1, "no_cactus": 0} image_path, label = [], [] for cat, enc in classes.items(): cat_path = os.path.join( self.root, "cactus-aerial-photos", self.data_mode, self.data_mode, cat ) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df
class COWC (root: str, train: bool = True, task_mode: str = 'counting', transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Cars Overhead with Context.
Args
root
:string
- Root directory of dataset.
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from test set.
task_mode
:string
- There is 2 task mode i.e. 'counting' and 'detection'. Default value is 'counting'.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class COWC(VisionDataset): """Cars Overhead with Context. https://gdo152.llnl.gov/cowc/ Args: root (string): Root directory of dataset. train (bool, optional): If True, creates dataset from training set, otherwise creates from test set. task_mode (string): There is 2 task mode i.e. 'counting' and 'detection'. Default value is 'counting'. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://gdo152.llnl.gov/cowc/download" resources = "cowc-everything.txz" def __init__( self, root: str, train: bool = True, task_mode: str = "counting", transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(COWC, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.train = train self.task_mode = task_mode if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() if self.task_mode == "counting": self.task_path = os.path.join(self.root, "cowc/datasets/patch_sets/counting") self.file_mapping = file_mapping_counting elif self.task_mode == "detection": self.task_path = os.path.join(self.root, "cowc/datasets/patch_sets/detection") self.file_mapping = file_mapping_detection else: raise ValueError("task_mode not recognized.") for filename, compressed in self.file_mapping.items(): if not self._check_exists_subfile(filename): self.extract_subfile(filename, compressed) self.img_labels = self.get_path_and_label() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img_path = self.img_labels.iloc[idx, 0] target = self.img_labels.iloc[idx, 1] folder = img_path.split("/", 1)[0] img_path = os.path.join(self.task_path, folder, img_path) img = np.array(_load_img(img_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.img_labels) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" if self.task_mode == "counting": if self.train: label_name = "COWC_train_list_64_class.txt.bz2" else: label_name = "COWC_test_list_64_class.txt.bz2" elif self.task_mode == "detection": if self.train: label_name = "COWC_train_list_detection.txt.bz2" else: label_name = "COWC_test_list_detection.txt.bz2" else: raise ValueError("task_mode not recognized.") label_path = os.path.join(self.task_path, label_name) df = pd.read_csv(label_path, sep=" ", header=None) return df def _check_exists_subfile(self, filename): path_to_check = os.path.join(self.task_path, filename) return os.path.exists(path_to_check) def extract_subfile(self, filename, compressed): comp_path = os.path.join(self.task_path, compressed) file_path = os.path.join(self.task_path, filename) tar = tarfile.open(comp_path) tar.extractall(file_path) tar.close() def _check_exists(self): return os.path.exists(os.path.join(self.root, "cowc")) def download(self) -> None: """download file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
download file.
Expand source code
def download(self) -> None: """download file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract file from compressed.
Expand source code
def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
def extract_subfile(self, filename, compressed)
-
Expand source code
def extract_subfile(self, filename, compressed): comp_path = os.path.join(self.task_path, compressed) file_path = os.path.join(self.task_path, filename) tar = tarfile.open(comp_path) tar.extractall(file_path) tar.close()
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" if self.task_mode == "counting": if self.train: label_name = "COWC_train_list_64_class.txt.bz2" else: label_name = "COWC_test_list_64_class.txt.bz2" elif self.task_mode == "detection": if self.train: label_name = "COWC_train_list_detection.txt.bz2" else: label_name = "COWC_test_list_detection.txt.bz2" else: raise ValueError("task_mode not recognized.") label_path = os.path.join(self.task_path, label_name) df = pd.read_csv(label_path, sep=" ", header=None) return df
class Cloud38 (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Cloud 38 Dataset.
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class Cloud38(VisionDataset): """Cloud 38 Dataset. Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "http://vault.sfu.ca/index.php/s/pymNqYF09JkM8Bp/download" resources = "38cloud.zip" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(Cloud38, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.data_path = os.path.join(self.root, "38cloud") self.base_path = Path(os.path.join(self.data_path, "38-Cloud_training")) if not os.path.exists(self.root): os.makedirs(self.root) if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.file_validator() self.labels = self.get_path() print("Done.") def file_validator(self): if not (self.base_path / "train_rgb").exists(): (self.base_path / "train_rgb").mkdir() if not (self.base_path / "labels").exists(): (self.base_path / "labels").mkdir() for red_patch in (self.base_path / "train_red").iterdir(): self.create_rgb_pil(red_patch) for gt_patch in (self.base_path / "train_gt").iterdir(): self.convert_tif_png(gt_patch, self.base_path / "labels") def get_path(self): label = [] path_label = os.path.join(self.base_path, "labels") path_gt = os.path.join(self.base_path, "train_gt") label_listing = [os.path.join(path_label, i) for i in os.listdir(path_label)] gt_listing = [os.path.join(path_gt, i) for i in os.listdir(path_gt)] return pd.DataFrame({"GT": gt_listing, "Label": label_listing}) def create_rgb_pil(self, red_filename: Path): """Combining three bands to RGB format""" self.red_filename = str(red_filename) green_fn = self.red_filename.replace("red", "green") blue_fn = self.red_filename.replace("red", "blue") rgb_fn = self.red_filename.replace("red", "rgb").replace(".TIF", ".png") array_red = np.array(Image.open(self.red_filename)) array_green = np.array(Image.open(green_fn)) array_blue = np.array(Image.open(blue_fn)) array_rgb = np.stack([array_red, array_green, array_blue], axis=2) array_rgb = array_rgb / np.iinfo(array_rgb.dtype).max rgb = Image.fromarray((256 * array_rgb).astype(np.uint8), "RGB") rgb.save(rgb_fn) return rgb def convert_tif_png(self, tif_file: Path, out_folder: Path): """Converting TIF file to PNG format""" self.tif_file = tif_file self.out_folder = out_folder array_tif = np.array(Image.open(self.tif_file)) im = Image.fromarray(np.where(array_tif == 255, 1, 0)) im.save(self.out_folder / self.tif_file.with_suffix(".png").name) return im def __len__(self) -> int: return len(self.img_labels) def download(self) -> None: """download and extract file.""" _urlretrieve(self.mirrors, os.path.join(self.root, self.resources)) def _check_exists(self): """Check file has been download or not""" folders = [ "38-Cloud_95-Cloud_Test_Metadata_Files", "38-Cloud_test", "38-Cloud_training", "38-Cloud_Training_Metadata_Files", ] status = [ os.path.exists(os.path.join(self.data_path, folder_pth)) for folder_pth in folders ] return all(status) def extract_file(self): """Extract file from the compressed""" print("Extracting...") shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def convert_tif_png(self, tif_file: pathlib.Path, out_folder: pathlib.Path)
-
Converting TIF file to PNG format
Expand source code
def convert_tif_png(self, tif_file: Path, out_folder: Path): """Converting TIF file to PNG format""" self.tif_file = tif_file self.out_folder = out_folder array_tif = np.array(Image.open(self.tif_file)) im = Image.fromarray(np.where(array_tif == 255, 1, 0)) im.save(self.out_folder / self.tif_file.with_suffix(".png").name) return im
def create_rgb_pil(self, red_filename: pathlib.Path)
-
Combining three bands to RGB format
Expand source code
def create_rgb_pil(self, red_filename: Path): """Combining three bands to RGB format""" self.red_filename = str(red_filename) green_fn = self.red_filename.replace("red", "green") blue_fn = self.red_filename.replace("red", "blue") rgb_fn = self.red_filename.replace("red", "rgb").replace(".TIF", ".png") array_red = np.array(Image.open(self.red_filename)) array_green = np.array(Image.open(green_fn)) array_blue = np.array(Image.open(blue_fn)) array_rgb = np.stack([array_red, array_green, array_blue], axis=2) array_rgb = array_rgb / np.iinfo(array_rgb.dtype).max rgb = Image.fromarray((256 * array_rgb).astype(np.uint8), "RGB") rgb.save(rgb_fn) return rgb
def download(self) ‑> None
-
download and extract file.
Expand source code
def download(self) -> None: """download and extract file.""" _urlretrieve(self.mirrors, os.path.join(self.root, self.resources))
def extract_file(self)
-
Extract file from the compressed
Expand source code
def extract_file(self): """Extract file from the compressed""" print("Extracting...") shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
def file_validator(self)
-
Expand source code
def file_validator(self): if not (self.base_path / "train_rgb").exists(): (self.base_path / "train_rgb").mkdir() if not (self.base_path / "labels").exists(): (self.base_path / "labels").mkdir() for red_patch in (self.base_path / "train_red").iterdir(): self.create_rgb_pil(red_patch) for gt_patch in (self.base_path / "train_gt").iterdir(): self.convert_tif_png(gt_patch, self.base_path / "labels")
def get_path(self)
-
Expand source code
def get_path(self): label = [] path_label = os.path.join(self.base_path, "labels") path_gt = os.path.join(self.base_path, "train_gt") label_listing = [os.path.join(path_label, i) for i in os.listdir(path_label)] gt_listing = [os.path.join(path_gt, i) for i in os.listdir(path_gt)] return pd.DataFrame({"GT": gt_listing, "Label": label_listing})
class DeepSat (root: str, dataset_type='SAT-4', train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
DeepSat Dataset.
Args
root
:string
- Root directory of dataset.
dataset_type
:string
, optional- Choose dataset type ['SAT-4', 'SAT-6'].
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from test set.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class DeepSat(VisionDataset): """DeepSat Dataset. Args: root (string): Root directory of dataset. dataset_type (string, optional): Choose dataset type ['SAT-4', 'SAT-6']. train (bool, optional): If True, creates dataset from training set, otherwise creates from test set. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ resources = { "SAT-4_and_SAT-6_datasets": "https://drive.google.com/uc?id=0B0Fef71_vt3PUkZ4YVZ5WWNvZWs&export=download" } dataset_types = ["SAT-4", "SAT-6"] def __init__( self, root: str, dataset_type="SAT-4", train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(DeepSat, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.dataset_type = dataset_type self.train = train self.folder_pth = os.path.join(self.root, list(self.resources.keys())[0]) self.filename = list(self.resources.keys())[0] + ".tar.gz" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() dataset = self.load_dataset() self.choose_data_mode(dataset) def download(self) -> None: """Download dataset and extract it""" self.root = os.path.expanduser(self.root) print("Download dataset...") gdown.download( self.resources["SAT-4_and_SAT-6_datasets"], os.path.join(self.root, self.filename), quiet=False, ) if os.path.exists(self.folder_pth): print(f"file {self.folder_pth} already exists") else: os.mkdir(self.folder_pth) print(f"Extracting file {self.filename}") os.system(f"tar -xvf {os.path.join(self.root, self.filename)} -C {self.folder_pth}") os.system(f"mv {self.folder_pth} {self.root}") print("Extracting file success !") def _check_exists(self) -> bool: if self.dataset_type not in self.dataset_types: print(f"Unknown dataset {self.dataset_type}") print(f"Available dataset : {self.dataset_types}") sys.exit(0) if os.path.exists(self.filename): return True else: return False def load_dataset(self): filename = {"SAT-4": "sat-4-full.mat", "SAT-6": "sat-6-full.mat"} dataset = sio.loadmat(os.path.join(self.folder_pth, filename[self.dataset_type])) return dataset def choose_data_mode(self, dataset): if self.train: x_type, y_type = "train_x", "train_y" else: x_type, y_type = "test_x", "test_y" self.x, self.y = dataset[x_type], dataset[y_type] self.annot = dataset["annotations"] def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img = self.x[:, :, :, idx] target = self.y[:, idx] if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return self.x.shape[3]
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var dataset_types
var functions : Dict[str, Callable]
var resources
Methods
def choose_data_mode(self, dataset)
-
Expand source code
def choose_data_mode(self, dataset): if self.train: x_type, y_type = "train_x", "train_y" else: x_type, y_type = "test_x", "test_y" self.x, self.y = dataset[x_type], dataset[y_type] self.annot = dataset["annotations"]
def download(self) ‑> None
-
Download dataset and extract it
Expand source code
def download(self) -> None: """Download dataset and extract it""" self.root = os.path.expanduser(self.root) print("Download dataset...") gdown.download( self.resources["SAT-4_and_SAT-6_datasets"], os.path.join(self.root, self.filename), quiet=False, ) if os.path.exists(self.folder_pth): print(f"file {self.folder_pth} already exists") else: os.mkdir(self.folder_pth) print(f"Extracting file {self.filename}") os.system(f"tar -xvf {os.path.join(self.root, self.filename)} -C {self.folder_pth}") os.system(f"mv {self.folder_pth} {self.root}") print("Extracting file success !")
def load_dataset(self)
-
Expand source code
def load_dataset(self): filename = {"SAT-4": "sat-4-full.mat", "SAT-6": "sat-6-full.mat"} dataset = sio.loadmat(os.path.join(self.folder_pth, filename[self.dataset_type])) return dataset
class DroneDeploy (root: str, dataset_type='dataset-sample', data_mode: int = 0, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Drone Deploy Semantic Dataset.
Args
root
:string
- Root directory of dataset.
dataset_type
:string
, optional- Choose dataset type.
data_mode
:int
- 0 for train data, 1 for validation data, and 2 for testing data
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class DroneDeploy(VisionDataset): """Drone Deploy Semantic Dataset. Args: root (string): Root directory of dataset. dataset_type (string, optional): Choose dataset type. data_mode (int): 0 for train data, 1 for validation data, and 2 for testing data transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ resources = { "dataset-sample": "https://dl.dropboxusercontent.com/s/h8a8kev0rktf4kq/dataset-sample.tar.gz?dl=0", "dataset-medium": "https://dl.dropboxusercontent.com/s/r0dj9mhyv4bgbme/dataset-medium.tar.gz?dl=0", } def __init__( self, root: str, dataset_type="dataset-sample", data_mode: int = 0, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(DroneDeploy, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root self.dataset_type = dataset_type self.filename = f"{dataset_type}.tar.gz" self.filepath = os.path.join(self.root, self.filename) self.data_mode = data_mode self.label_path = f"{dataset_type}/label-chips" self.image_path = f"{dataset_type}/image-chips" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.load_dataset() def download(self) -> None: """Download a dataset, extract it and create the tiles.""" print(f'Downloading "{self.dataset_type}"') self.root = os.path.expanduser(self.root) fpath = os.path.join(self.root, self.filename) _urlretrieve(self.resources[self.dataset_type], fpath) if not os.path.exists(os.path.join(self.root, self.dataset_type)): print(f'Extracting "{self.filepath}"') os.system(f"tar -xvf {self.filepath}") os.system(f"mv {self.dataset_type} {self.root}") else: print(f'Folder "{self.dataset_type}" already exists.') image_chips = f"{self.dataset_type}/image-chips" label_chips = f"{self.dataset_type}/label-chips" if not os.path.exists(image_chips): os.mkdir(os.path.join(self.root, image_chips)) if not os.path.exists(label_chips): os.mkdir(os.path.join(self.root, label_chips)) run(os.path.join(self.root, self.dataset_type)) def _check_exists(self) -> bool: if self.dataset_type not in self.resources.keys(): print(f"Unknown dataset {self.dataset_type}") print(f"Available dataset : {self.resources.keys()}") sys.exit(0) if os.path.exists(self.filepath): return True else: return False def load_dataset(self): if self.data_mode == 0: list_chip = "train.txt" elif self.data_mode == 1: list_chip = "valid.txt" elif self.data_mode == 2: list_chip = "test.txt" files = [ f"{os.path.join(self.root, self.dataset_type)}/image-chips/{fname}" for fname in load_lines(os.path.join(self.root, self.dataset_type, list_chip)) ] self.image_files = files def __getitem__(self, idx) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ image_file = self.image_files[idx] label_file = image_file.replace(self.image_path, self.label_path) img = np.array(load_img(image_file)) target = mask_to_classes(load_img(label_file)) target = np.array(target) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.image_files) def on_epoch_end(self): random.shuffle(self.image_files)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var resources
Methods
def download(self) ‑> None
-
Download a dataset, extract it and create the tiles.
Expand source code
def download(self) -> None: """Download a dataset, extract it and create the tiles.""" print(f'Downloading "{self.dataset_type}"') self.root = os.path.expanduser(self.root) fpath = os.path.join(self.root, self.filename) _urlretrieve(self.resources[self.dataset_type], fpath) if not os.path.exists(os.path.join(self.root, self.dataset_type)): print(f'Extracting "{self.filepath}"') os.system(f"tar -xvf {self.filepath}") os.system(f"mv {self.dataset_type} {self.root}") else: print(f'Folder "{self.dataset_type}" already exists.') image_chips = f"{self.dataset_type}/image-chips" label_chips = f"{self.dataset_type}/label-chips" if not os.path.exists(image_chips): os.mkdir(os.path.join(self.root, image_chips)) if not os.path.exists(label_chips): os.mkdir(os.path.join(self.root, label_chips)) run(os.path.join(self.root, self.dataset_type))
def load_dataset(self)
-
Expand source code
def load_dataset(self): if self.data_mode == 0: list_chip = "train.txt" elif self.data_mode == 1: list_chip = "valid.txt" elif self.data_mode == 2: list_chip = "test.txt" files = [ f"{os.path.join(self.root, self.dataset_type)}/image-chips/{fname}" for fname in load_lines(os.path.join(self.root, self.dataset_type, list_chip)) ] self.image_files = files
def on_epoch_end(self)
-
Expand source code
def on_epoch_end(self): random.shuffle(self.image_files)
class EuroSat (root: str, transform=Compose( Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)
-
EuroSat Land Cover Categories.
http://madm.dfki.de/files/sentinel
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class EuroSat(VisionDataset): """EuroSat Land Cover Categories. <http://madm.dfki.de/files/sentinel> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "http://madm.dfki.de/files/sentinel" resources = "EuroSAT.zip" classes = { "AnnualCrop": 0, "Forest": 1, "HerbaceousVegetation": 2, "Highway": 3, "Industrial": 4, "Pasture": 5, "PermanentCrop": 6, "Residential": 7, "River": 8, "SeaLake": 9, } def __init__( self, root: str, transform=Compose([Resize((64, 64)), ToTensor()]), target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(EuroSat, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.data_mode = "2750" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img_path = self.img_labels.iloc[idx, 0] img = np.array(_load_img(img_path)) target = self.img_labels.iloc[idx, 1] if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.img_labels) def _check_exists(self) -> None: self.data_path = os.path.join(self.root, self.data_mode) self.dir_classes = list(self.classes.keys()) return all([os.path.exists(os.path.join(self.data_path, i)) for i in self.dir_classes]) def download(self) -> None: """Download file""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract the .zip file""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources)) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" image_path = [] label = [] for cat, enc in self.classes.items(): cat_path = os.path.join(self.root, self.data_mode, cat) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var classes
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
Download file
Expand source code
def download(self) -> None: """Download file""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract the .zip file
Expand source code
def extract_file(self) -> None: """Extract the .zip file""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" image_path = [] label = [] for cat, enc in self.classes.items(): cat_path = os.path.join(self.root, self.data_mode, cat) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df
class L7Irish (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Landsat 7 Irish Cloud.
https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class L7Irish(VisionDataset): """Landsat 7 Irish Cloud. <https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "http://landsat.usgs.gov/cloud-validation/cca_irish_2015/" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(L7Irish, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.download_urls = self.get_download_url() self.resources = [url.split("/")[-1] for url in self.download_urls] self.data_modes = [filename.split(".tar.gz")[0] for filename in self.resources] if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def get_download_url(self): """Get the urls to download the files.""" page = requests.get( "https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data" ) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] urls = list(filter(None, urls)) download_urls = filter(lambda url: url.endswith(".gz"), urls) return download_urls def download(self): """Download file""" for resource in self.resources: file_url = posixpath.join(self.mirrors, resource) _urlretrieve(file_url, os.path.join(self.root, resource)) def extract_file(self): """Extract the .zip file""" for resource in self.resources: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource)) def _check_exists(self): is_exists = [] if not os.path.isdir(self.root): os.mkdir(self.root) for data_mode in self.data_modes: data_path = os.path.join(self.root, data_mode) is_exists.append(os.path.exists(data_path)) return all(is_exists) def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_path, label = [], [] for data_mode in self.data_modes: for image in glob.glob(os.path.join(self.root, data_mode, "L7*.TIF")): image_path.append(image) label.extend(glob.glob(os.path.join(self.root, data_mode, "*mask*"))) df = pd.DataFrame({"image": image_path, "label": label}) return df def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_path = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] img = np.array(_load_img(img_path)) mask = np.array(_load_img(mask_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: return len(self.img_labels)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
Methods
def download(self)
-
Download file
Expand source code
def download(self): """Download file""" for resource in self.resources: file_url = posixpath.join(self.mirrors, resource) _urlretrieve(file_url, os.path.join(self.root, resource))
def extract_file(self)
-
Extract the .zip file
Expand source code
def extract_file(self): """Extract the .zip file""" for resource in self.resources: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource))
def get_download_url(self)
-
Get the urls to download the files.
Expand source code
def get_download_url(self): """Get the urls to download the files.""" page = requests.get( "https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data" ) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] urls = list(filter(None, urls)) download_urls = filter(lambda url: url.endswith(".gz"), urls) return download_urls
def get_path_and_label(self)
-
Get the path of the images and labels (masks) in a dataframe
Expand source code
def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_path, label = [], [] for data_mode in self.data_modes: for image in glob.glob(os.path.join(self.root, data_mode, "L7*.TIF")): image_path.append(image) label.extend(glob.glob(os.path.join(self.root, data_mode, "*mask*"))) df = pd.DataFrame({"image": image_path, "label": label}) return df
class L8Biome (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
L8 Biome Cloud Cover.
Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class L8Biome(VisionDataset): """L8 Biome Cloud Cover. Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(L8Biome, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.download_urls = self.get_download_url() self.data_modes = [url.split("/")[-1] for url in self.download_urls] if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def get_download_url(self): """Get the urls to download the files.""" page = requests.get(self.mirrors) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls)) return download_urls def download(self): """Download file""" for resource in self.download_urls: filename = resource.split("/")[-1] _urlretrieve(resource, os.path.join(self.root, filename)) def extract_file(self): """Extract the .zip file""" for resource in self.data_modes: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource)) def _check_exists(self): is_exists = [] if not os.path.isdir(self.root): os.mkdir(self.root) for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") data_path = os.path.join(self.root, "BC", data_mode) is_exists.append(os.path.exists(data_path)) return all(is_exists) def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_directory, label = [], [] for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") image_dir = os.path.join(self.root, "BC", data_mode) image_directory.append(image_dir) label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr"))) df = pd.DataFrame({"image": image_directory, "label": label}) return df def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_directory = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] ls_stack_path = [] for idx in range(1, 12): observation = img_directory.split("/")[-1] name_file = f"{img_directory}/{observation}_B{idx}.TIF" ls_stack_path.append(name_file) img = _load_stack_img(ls_stack_path) mask = _load_img_hdr(mask_path) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: return len(self.img_labels)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
Methods
def download(self)
-
Download file
Expand source code
def download(self): """Download file""" for resource in self.download_urls: filename = resource.split("/")[-1] _urlretrieve(resource, os.path.join(self.root, filename))
def extract_file(self)
-
Extract the .zip file
Expand source code
def extract_file(self): """Extract the .zip file""" for resource in self.data_modes: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource))
def get_download_url(self)
-
Get the urls to download the files.
Expand source code
def get_download_url(self): """Get the urls to download the files.""" page = requests.get(self.mirrors) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls)) return download_urls
def get_path_and_label(self)
-
Get the path of the images and labels (masks) in a dataframe
Expand source code
def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_directory, label = [], [] for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") image_dir = os.path.join(self.root, "BC", data_mode) image_directory.append(image_dir) label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr"))) df = pd.DataFrame({"image": image_directory, "label": label}) return df
class L8SPARCS (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Landsat 8 SPARCS Cloud.
Download: https://landsat.usgs.gov/cloud-validation/sparcs/l8cloudmasks.zip
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class L8SPARCS(VisionDataset): """Landsat 8 SPARCS Cloud. <https://www.usgs.gov/core-science-systems/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs> Download: <https://landsat.usgs.gov/cloud-validation/sparcs/l8cloudmasks.zip> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://landsat.usgs.gov/cloud-validation/sparcs/" resources = "l8cloudmasks.zip" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(L8SPARCS, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.data_mode = "sending" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def _check_exists(self) -> None: self.data_path = os.path.join(self.root, self.data_mode) return os.path.exists(self.data_path) def download(self) -> None: """Download file""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract the .zip file""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources)) def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_path, label = [], [] for image in glob.glob(os.path.join(self.root, self.data_mode, "*_photo.png")): image_path.append(image) for mask in glob.glob(os.path.join(self.root, self.data_mode, "*_mask.png")): label.append(mask) df = pd.DataFrame({"image": sorted(image_path), "label": sorted(label)}) return df def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_path = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] img = np.array(_load_img(img_path)) mask = np.array(_load_img(mask_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: return len(self.img_labels)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
Download file
Expand source code
def download(self) -> None: """Download file""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract the .zip file
Expand source code
def extract_file(self) -> None: """Extract the .zip file""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
def get_path_and_label(self)
-
Get the path of the images and labels (masks) in a dataframe
Expand source code
def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_path, label = [], [] for image in glob.glob(os.path.join(self.root, self.data_mode, "*_photo.png")): image_path.append(image) for mask in glob.glob(os.path.join(self.root, self.data_mode, "*_mask.png")): label.append(mask) df = pd.DataFrame({"image": sorted(image_path), "label": sorted(label)}) return df
class LandCover (root: str, transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), download: bool = False)
-
The LandCover.ai (Land Cover from Aerial Imagery) dataset.
https://landcover.ai/download/landcover.ai.v1.zip
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class LandCover(VisionDataset): """The LandCover.ai (Land Cover from Aerial Imagery) dataset. <https://landcover.ai/download/landcover.ai.v1.zip> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://landcover.ai/download" resources = "landcover.ai.v1.zip" def __init__( self, root: str, transform=Compose([Resize((256, 256)), ToTensor()]), target_transform=Compose([Resize((256, 256)), ToTensor()]), download: bool = False, ) -> None: super(LandCover, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.to_chip_img_mask("landcover") self.img_labels = self.get_image_path_and_mask_path() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_path = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] img = np.array(_load_img(img_path)) mask = np.array(_load_img(mask_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: return len(self.img_labels) def get_image_path_and_mask_path(self): """Return dataframe type consist of image path and mask path.""" img_path = os.path.join(self.root, "landcover", "images") msk_path = os.path.join(self.root, "landcover", "masks") images_path = [os.path.join(img_path, path) for path in os.listdir(img_path)] images_path.sort() masks_path = [os.path.join(img_path, path) for path in os.listdir(msk_path)] masks_path.sort() df = pd.DataFrame({"image": images_path, "mask": masks_path}) return df def to_chip_img_mask(self, base): IMGS_DIR = "./{}/images".format(base) MASKS_DIR = "./{}/masks".format(base) OUTPUT_DIR = "./{}/output".format(base) OUTPUT_IMGS_DIR = "./{}/output/images".format(base) OUTPUT_MASKS_DIR = "./{}/output/masks".format(base) TARGET_SIZE = 512 img_paths = glob.glob(os.path.join(IMGS_DIR, "*.tif")) mask_paths = glob.glob(os.path.join(MASKS_DIR, "*.tif")) img_paths.sort() mask_paths.sort() # os.makedirs(OUTPUT_DIR) os.makedirs(OUTPUT_IMGS_DIR) os.makedirs(OUTPUT_MASKS_DIR) for i, (img_path, mask_path) in enumerate(zip(img_paths, mask_paths)): img_filename = os.path.splitext(os.path.basename(img_path))[0] mask_filename = os.path.splitext(os.path.basename(mask_path))[0] img = cv2.imread(img_path) mask = cv2.imread(mask_path) assert img_filename == mask_filename and img.shape[:2] == mask.shape[:2] k = 0 for y in range(0, img.shape[0], TARGET_SIZE): for x in range(0, img.shape[1], TARGET_SIZE): img_tile = img[y : y + TARGET_SIZE, x : x + TARGET_SIZE] mask_tile = mask[y : y + TARGET_SIZE, x : x + TARGET_SIZE] if img_tile.shape[0] == TARGET_SIZE and img_tile.shape[1] == TARGET_SIZE: out_img_path = os.path.join( OUTPUT_DIR, "images", "{}_{}.jpg".format(img_filename, k) ) cv2.imwrite(out_img_path, img_tile) out_mask_path = os.path.join( OUTPUT_DIR, "masks", "{}_{}.png".format(mask_filename, k) ) cv2.imwrite(out_mask_path, mask_tile) k += 1 print("Processed {} {}/{}".format(img_filename, i + 1, len(img_paths))) def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def _check_exists(self): """Check file has been download or not""" self.data_path = os.path.join( self.root, "landcover", ) return os.path.exists(os.path.join(self.data_path, "images")) and os.path.exists( os.path.join(self.data_path, "masks") ) def extract_file(self): """Extract file from compressed.""" os.makedirs(os.path.join(self.root, "landcover")) shutil.unpack_archive( os.path.join(self.root, self.resources), os.path.join(self.root, "landcover") ) os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
download and extract file.
Expand source code
def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self)
-
Extract file from compressed.
Expand source code
def extract_file(self): """Extract file from compressed.""" os.makedirs(os.path.join(self.root, "landcover")) shutil.unpack_archive( os.path.join(self.root, self.resources), os.path.join(self.root, "landcover") ) os.remove(os.path.join(self.root, self.resources))
def get_image_path_and_mask_path(self)
-
Return dataframe type consist of image path and mask path.
Expand source code
def get_image_path_and_mask_path(self): """Return dataframe type consist of image path and mask path.""" img_path = os.path.join(self.root, "landcover", "images") msk_path = os.path.join(self.root, "landcover", "masks") images_path = [os.path.join(img_path, path) for path in os.listdir(img_path)] images_path.sort() masks_path = [os.path.join(img_path, path) for path in os.listdir(msk_path)] masks_path.sort() df = pd.DataFrame({"image": images_path, "mask": masks_path}) return df
def to_chip_img_mask(self, base)
-
Expand source code
def to_chip_img_mask(self, base): IMGS_DIR = "./{}/images".format(base) MASKS_DIR = "./{}/masks".format(base) OUTPUT_DIR = "./{}/output".format(base) OUTPUT_IMGS_DIR = "./{}/output/images".format(base) OUTPUT_MASKS_DIR = "./{}/output/masks".format(base) TARGET_SIZE = 512 img_paths = glob.glob(os.path.join(IMGS_DIR, "*.tif")) mask_paths = glob.glob(os.path.join(MASKS_DIR, "*.tif")) img_paths.sort() mask_paths.sort() # os.makedirs(OUTPUT_DIR) os.makedirs(OUTPUT_IMGS_DIR) os.makedirs(OUTPUT_MASKS_DIR) for i, (img_path, mask_path) in enumerate(zip(img_paths, mask_paths)): img_filename = os.path.splitext(os.path.basename(img_path))[0] mask_filename = os.path.splitext(os.path.basename(mask_path))[0] img = cv2.imread(img_path) mask = cv2.imread(mask_path) assert img_filename == mask_filename and img.shape[:2] == mask.shape[:2] k = 0 for y in range(0, img.shape[0], TARGET_SIZE): for x in range(0, img.shape[1], TARGET_SIZE): img_tile = img[y : y + TARGET_SIZE, x : x + TARGET_SIZE] mask_tile = mask[y : y + TARGET_SIZE, x : x + TARGET_SIZE] if img_tile.shape[0] == TARGET_SIZE and img_tile.shape[1] == TARGET_SIZE: out_img_path = os.path.join( OUTPUT_DIR, "images", "{}_{}.jpg".format(img_filename, k) ) cv2.imwrite(out_img_path, img_tile) out_mask_path = os.path.join( OUTPUT_DIR, "masks", "{}_{}.png".format(mask_filename, k) ) cv2.imwrite(out_mask_path, mask_tile) k += 1 print("Processed {} {}/{}".format(img_filename, i + 1, len(img_paths)))
class RESISC45 (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
RESISC45 Dataset.
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class RESISC45(VisionDataset): """RESISC45 Dataset. Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://storage.googleapis.com/ossjr" resources = "NWPU-RESISC45.zip" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(RESISC45, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.class_enc = CLASS_ENC self.class_dec = CLASS_DEC if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img_path = self.img_labels.iloc[idx, 0] img = np.array(_load_img(img_path)) target = self.img_labels.iloc[idx, 1] if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.img_labels) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" DATA_SIZE = 700 category = os.listdir(os.path.join(self.root, "NWPU-RESISC45")) image_path = [] label = [] for cat in category: cat_enc = self.class_enc[cat] label += [cat_enc] * DATA_SIZE for num in range(1, DATA_SIZE + 1): filename = cat + "_" + str(num).zfill(3) + ".jpg" image_path += [os.path.join(self.root, "NWPU-RESISC45", cat, filename)] df = pd.DataFrame({"image": image_path, "label": label}) return df def _check_exists(self): is_exists = os.path.exists(os.path.join(self.root, "NWPU-RESISC45")) return is_exists def download(self) -> None: """Download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), f"{self.root}") os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
Download and extract file.
Expand source code
def download(self) -> None: """Download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract file from compressed.
Expand source code
def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), f"{self.root}") os.remove(os.path.join(self.root, self.resources))
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" DATA_SIZE = 700 category = os.listdir(os.path.join(self.root, "NWPU-RESISC45")) image_path = [] label = [] for cat in category: cat_enc = self.class_enc[cat] label += [cat_enc] * DATA_SIZE for num in range(1, DATA_SIZE + 1): filename = cat + "_" + str(num).zfill(3) + ".jpg" image_path += [os.path.join(self.root, "NWPU-RESISC45", cat, filename)] df = pd.DataFrame({"image": image_path, "label": label}) return df
class Sentinel2Cloud (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Sentinel-2 Cloud Mask Catalogue dataset.
classification_tags: https://zenodo.org/record/4172871/files/classification_tags.csv?download=1 subscenes: https://zenodo.org/record/4172871/files/subscenes.zip?download=1 masks: https://zenodo.org/record/4172871/files/masks.zip?download=1
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class Sentinel2Cloud(VisionDataset): """Sentinel-2 Cloud Mask Catalogue dataset. classification_tags: <https://zenodo.org/record/4172871/files/classification_tags.csv?download=1> subscenes: <https://zenodo.org/record/4172871/files/subscenes.zip?download=1> masks: <https://zenodo.org/record/4172871/files/masks.zip?download=1> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://zenodo.org/record/4172871/files/" resources = "subscenes.zip" mask_resources = "masks.zip" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(Sentinel2Cloud, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root if not os.path.exists(self.root): os.makedirs(self.root) if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_image_path_and_mask_path() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_path = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] img = _load_npy(img_path) mask = _load_npy(mask_path) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: """Return the len of the image labels""" return len(self.img_labels) def get_image_path_and_mask_path(self): """Return dataframe type consist of image path and mask path.""" img_path = os.path.join(self.root, "sentinel2cloud", "subscenes") msk_path = os.path.join(self.root, "sentinel2cloud", "masks") images_path = glob.glob(os.path.join(img_path, "*.npy")) images_path.sort() masks_path = glob.glob(os.path.join(msk_path, "*.npy")) masks_path.sort() df = pd.DataFrame({"image": images_path, "mask": masks_path}) return df def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) mask_file_url = posixpath.join(self.mirrors, self.mask_resources) _urlretrieve(mask_file_url, os.path.join(self.root, self.mask_resources)) def _check_exists(self): """Check file has been download or not""" self.data_path = os.path.join(self.root, "sentinel2cloud") return os.path.exists(os.path.join(self.data_path, "subscenes")) and os.path.exists( os.path.join(self.data_path, "masks") ) def extract_file(self): """Extract file from compressed.""" os.makedirs(os.path.join(self.root, "sentinel2cloud")) shutil.unpack_archive( os.path.join(self.root, self.resources), os.path.join(self.root, "sentinel2cloud") ) os.remove(os.path.join(self.root, self.resources)) shutil.unpack_archive( os.path.join(self.root, self.mask_resources), os.path.join(self.root, "sentinel2cloud") ) os.remove(os.path.join(self.root, self.mask_resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mask_resources
var mirrors
var resources
Methods
def download(self) ‑> None
-
download and extract file.
Expand source code
def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) mask_file_url = posixpath.join(self.mirrors, self.mask_resources) _urlretrieve(mask_file_url, os.path.join(self.root, self.mask_resources))
def extract_file(self)
-
Extract file from compressed.
Expand source code
def extract_file(self): """Extract file from compressed.""" os.makedirs(os.path.join(self.root, "sentinel2cloud")) shutil.unpack_archive( os.path.join(self.root, self.resources), os.path.join(self.root, "sentinel2cloud") ) os.remove(os.path.join(self.root, self.resources)) shutil.unpack_archive( os.path.join(self.root, self.mask_resources), os.path.join(self.root, "sentinel2cloud") ) os.remove(os.path.join(self.root, self.mask_resources))
def get_image_path_and_mask_path(self)
-
Return dataframe type consist of image path and mask path.
Expand source code
def get_image_path_and_mask_path(self): """Return dataframe type consist of image path and mask path.""" img_path = os.path.join(self.root, "sentinel2cloud", "subscenes") msk_path = os.path.join(self.root, "sentinel2cloud", "masks") images_path = glob.glob(os.path.join(img_path, "*.npy")) images_path.sort() masks_path = glob.glob(os.path.join(msk_path, "*.npy")) masks_path.sort() df = pd.DataFrame({"image": images_path, "mask": masks_path}) return df
class So2Sat (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = True)
-
So2Sat Dataset to Predict Local Climate Zone (LCZ):
https://mediatum.ub.tum.de/1454690
Args
root
:string
- Root directory of dataset.
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from validation set.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class So2Sat(VisionDataset): """So2Sat Dataset to Predict Local Climate Zone (LCZ): <https://mediatum.ub.tum.de/1454690> Args: root (string): Root directory of dataset. train (bool, optional): If True, creates dataset from training set, otherwise creates from validation set. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://dataserv.ub.tum.de/s/m1454690/download?path=/&files=" resources = ["training.h5", "validation.h5"] def __init__( self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = True, ) -> None: super(So2Sat, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.data_mode = "training" if train else "validation" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.img_labels = self.get_path_and_label() def __len__(self) -> int: return len(self.img_labels) def __getitem__(self, idx: int) -> Tuple[Any, Any, Any]: """ Args: idx (int): Index Returns: tuple: (sen1, sen2, label) """ sen1 = self.img_labels["sen1"][idx] sen2 = self.img_labels["sen2"][idx] label = self.img_labels["label"][idx] if self.transform is not None: sen1 = Image.fromarray(sen1) sen1 = self.transform(sen1) sen2 = Image.fromarray(sen2) sen2 = self.transform(sen2) if self.target_transform is not None: label = Image.fromarray(label) label = self.target_transform(label) return (sen1, sen2, label) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" file = h5py.File(os.path.join(self.root, f"{self.data_mode}.h5"), "r") sen1 = np.array(file["sen1"]) sen2 = np.array(file["sen2"]) label = np.array(file["label"]) return {"sen1": sen1, "sen2": sen2, "label": label} def _check_exists(self): return os.path.exists(os.path.join(self.root, self.resources[0])) and os.path.exists( os.path.join(self.root, self.resources[1]) ) def download(self): """Download and extract file.""" if not os.path.exists(self.root): os.makedirs(self.root) for resource in self.resources: file_url = posixpath.join(self.mirrors, resource) _urlretrieve(file_url, os.path.join(self.root, resource))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self)
-
Download and extract file.
Expand source code
def download(self): """Download and extract file.""" if not os.path.exists(self.root): os.makedirs(self.root) for resource in self.resources: file_url = posixpath.join(self.mirrors, resource) _urlretrieve(file_url, os.path.join(self.root, resource))
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" file = h5py.File(os.path.join(self.root, f"{self.data_mode}.h5"), "r") sen1 = np.array(file["sen1"]) sen2 = np.array(file["sen2"]) label = np.array(file["label"]) return {"sen1": sen1, "sen2": sen2, "label": label}
class SpaceNet7 (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
SpaceNet7 (SN7): Multi-Temporal Urban Development Challenge
https://spacenet.ai/sn7-challenge/
Args
root
:string
- Root directory of dataset.
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from test set.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class SpaceNet7(VisionDataset): """SpaceNet7 (SN7): Multi-Temporal Urban Development Challenge <https://spacenet.ai/sn7-challenge/> Args: root (string): Root directory of dataset. train (bool, optional): If True, creates dataset from training set, otherwise creates from test set. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ resources = { "train": "s3://spacenet-dataset/spacenet/SN7_buildings/tarballs/SN7_buildings_train.tar.gz", "test": "s3://spacenet-dataset/spacenet/SN7_buildings/tarballs/SN7_buildings_test_public.tar.gz", } def __init__( self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(SpaceNet7, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root self.data_mode = "train" if train else "test" self.filename = self.resources.get(self.data_mode, "NULL").split("/")[-1] self.dataset_path = os.path.join(root, self.filename) data_mode_folder = {"train": "train", "test": "test_public"} self.folder_name = data_mode_folder.get(self.data_mode, "NULL") if not os.path.exists(self.root): os.makedirs(self.root) if download and self._check_exists(self.dataset_path): print("file already exists.") if download and not self._check_exists(os.path.join(self.root, self.folder_name)): self.download() self.extract_file() if self.data_mode == "train": aois = sorted( [ f for f in os.listdir(os.path.join(self.root, "train")) if os.path.isdir(os.path.join(self.root, "train", f)) ] ) aois_without_mask = [] for aoi in aois: mask_dir = os.path.join(self.root, "train", aoi, "masks/") if not self._check_exists(mask_dir): aois_without_mask.append(aoi) if aois_without_mask: print("Generating masks...") self.generate_mask(aois_without_mask) self.img_labels = self.get_path_and_label() def _check_exists(self, obj) -> bool: if os.path.exists(obj): return True else: return False def download(self): """Download dataset and extract it""" if self.data_mode not in self.resources.keys(): raise ValueError("Unrecognized data_mode") downloader(self.resources[self.data_mode], self.root) def extract_file(self): shutil.unpack_archive(self.dataset_path, self.root) def generate_mask(self, aois): """ Create Training Masks Multi-thread to increase speed We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb """ make_fbc = False input_args = [] for i, aoi in enumerate(aois): print(i, "aoi:", aoi) im_dir = os.path.join(self.root, "train", aoi, "images_masked/") json_dir = os.path.join(self.root, "train", aoi, "labels_match/") out_dir_mask = os.path.join(self.root, "train", aoi, "masks/") out_dir_mask_fbc = os.path.join(self.root, "train", aoi, "masks_fbc/") os.makedirs(out_dir_mask, exist_ok=True) if make_fbc: os.makedirs(out_dir_mask_fbc, exist_ok=True) json_files = sorted( [ f for f in os.listdir(os.path.join(json_dir)) if f.endswith("Buildings.geojson") and os.path.exists(os.path.join(json_dir, f)) ] ) for j, f in enumerate(json_files): # print(i, j, f) name_root = f.split(".")[0] json_path = os.path.join(json_dir, f) image_path = ( os.path.join(im_dir, name_root + ".tif") .replace("labels", "images") .replace("_Buildings", "") ) output_path_mask = os.path.join(out_dir_mask, name_root + ".tif") if make_fbc: output_path_mask_fbc = os.path.join(out_dir_mask_fbc, name_root + ".tif") else: output_path_mask_fbc = None if os.path.exists(output_path_mask): continue else: input_args.append( [ make_geojsons_and_masks, name_root, image_path, json_path, output_path_mask, output_path_mask_fbc, ] ) p = multiprocessing.Pool(multiprocessing.cpu_count() - 1) out = p.map(map_wrapper, input_args) p.close() p.join() def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label (for train data), or image path only (for test data).""" pops = ["train", "test_public"] for pop in pops: d = os.path.join(self.root, pop) im_list, mask_list = [], [] subdirs = sorted([f for f in os.listdir(d) if os.path.isdir(os.path.join(d, f))]) for subdir in subdirs: if pop == "train": im_files = [ os.path.join(d, subdir, "images_masked", f) for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") and os.path.exists( os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") ) ] mask_files = [ os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") and os.path.exists( os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") ) ] im_list.extend(im_files) mask_list.extend(mask_files) elif pop == "test_public": im_files = [ os.path.join(d, subdir, "images_masked", f) for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") ] im_list.extend(im_files) if self.data_mode == "train": df = pd.DataFrame({"image": im_list, "label": mask_list}) elif self.data_mode == "test": df = pd.DataFrame({"image": im_list}) return df def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) or (img) """ img_path = self.img_labels.iloc[idx, 0] img = np.array(_load_img(img_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.data_mode == "train": mask_path = self.img_labels.iloc[idx, 1] mask = np.array(_load_img(mask_path)) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) sample = (img, mask) elif self.data_mode == "test": sample = img return sample def __len__(self) -> int: return len(self.img_labels)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var resources
Methods
def download(self)
-
Download dataset and extract it
Expand source code
def download(self): """Download dataset and extract it""" if self.data_mode not in self.resources.keys(): raise ValueError("Unrecognized data_mode") downloader(self.resources[self.data_mode], self.root)
def extract_file(self)
-
Expand source code
def extract_file(self): shutil.unpack_archive(self.dataset_path, self.root)
def generate_mask(self, aois)
-
Create Training Masks Multi-thread to increase speed We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb
Expand source code
def generate_mask(self, aois): """ Create Training Masks Multi-thread to increase speed We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb """ make_fbc = False input_args = [] for i, aoi in enumerate(aois): print(i, "aoi:", aoi) im_dir = os.path.join(self.root, "train", aoi, "images_masked/") json_dir = os.path.join(self.root, "train", aoi, "labels_match/") out_dir_mask = os.path.join(self.root, "train", aoi, "masks/") out_dir_mask_fbc = os.path.join(self.root, "train", aoi, "masks_fbc/") os.makedirs(out_dir_mask, exist_ok=True) if make_fbc: os.makedirs(out_dir_mask_fbc, exist_ok=True) json_files = sorted( [ f for f in os.listdir(os.path.join(json_dir)) if f.endswith("Buildings.geojson") and os.path.exists(os.path.join(json_dir, f)) ] ) for j, f in enumerate(json_files): # print(i, j, f) name_root = f.split(".")[0] json_path = os.path.join(json_dir, f) image_path = ( os.path.join(im_dir, name_root + ".tif") .replace("labels", "images") .replace("_Buildings", "") ) output_path_mask = os.path.join(out_dir_mask, name_root + ".tif") if make_fbc: output_path_mask_fbc = os.path.join(out_dir_mask_fbc, name_root + ".tif") else: output_path_mask_fbc = None if os.path.exists(output_path_mask): continue else: input_args.append( [ make_geojsons_and_masks, name_root, image_path, json_path, output_path_mask, output_path_mask_fbc, ] ) p = multiprocessing.Pool(multiprocessing.cpu_count() - 1) out = p.map(map_wrapper, input_args) p.close() p.join()
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label (for train data), or image path only (for test data).
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label (for train data), or image path only (for test data).""" pops = ["train", "test_public"] for pop in pops: d = os.path.join(self.root, pop) im_list, mask_list = [], [] subdirs = sorted([f for f in os.listdir(d) if os.path.isdir(os.path.join(d, f))]) for subdir in subdirs: if pop == "train": im_files = [ os.path.join(d, subdir, "images_masked", f) for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") and os.path.exists( os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") ) ] mask_files = [ os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") and os.path.exists( os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif") ) ] im_list.extend(im_files) mask_list.extend(mask_files) elif pop == "test_public": im_files = [ os.path.join(d, subdir, "images_masked", f) for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked"))) if f.endswith(".tif") ] im_list.extend(im_files) if self.data_mode == "train": df = pd.DataFrame({"image": im_list, "label": mask_list}) elif self.data_mode == "test": df = pd.DataFrame({"image": im_list}) return df
class UCMercedLand (root: str, transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)
-
UC Merced Land Use Dataset.
http://weegee.vision.ucmerced.edu/datasets/UCMerced_LandUse.zip
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class UCMercedLand(VisionDataset): """UC Merced Land Use Dataset. <http://weegee.vision.ucmerced.edu/datasets/UCMerced_LandUse.zip> Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "http://weegee.vision.ucmerced.edu/datasets/" resources = "UCMerced_LandUse.zip" classes = { "agricultural": 0, "airplane": 1, "baseballdiamond": 2, "beach": 3, "buildings": 4, "chaparral": 5, "denseresidential": 6, "forest": 7, "freeway": 8, "golfcourse": 9, "harbor": 10, "intersection": 11, "mediumresidential": 12, "mobilehomepark": 13, "overpass": 14, "parkinglot": 15, "river": 16, "runway": 17, "sparseresidential": 18, "storagetanks": 19, "tenniscourt": 20, } def __init__( self, root: str, transform=Compose([Resize((256, 256)), ToTensor()]), target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(UCMercedLand, self).__init__( root, transform=transform, target_transform=target_transform ) self.root = root self.data_mode = "Images" if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is index of the target class. """ img_path = self.img_labels.iloc[idx, 0] img = np.array(_load_img(img_path)) target = self.img_labels.iloc[idx, 1] if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: target = Image.fromarray(target) target = self.target_transform(target) return img, target def __len__(self) -> int: return len(self.img_labels) def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" image_path = [] label = [] for cat, enc in self.classes.items(): cat_path = os.path.join(self.root, "UCMerced_LandUse", self.data_mode, cat) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df def _check_exists(self): self.data_path = os.path.join(self.root, "UCMerced_LandUse", "Images") self.dir_classes = list(self.classes.keys()) return all([os.path.exists(os.path.join(self.data_path, i)) for i in self.dir_classes]) def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources)) def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var classes
var functions : Dict[str, Callable]
var mirrors
var resources
Methods
def download(self) ‑> None
-
download and extract file.
Expand source code
def download(self) -> None: """download and extract file.""" file_url = posixpath.join(self.mirrors, self.resources) _urlretrieve(file_url, os.path.join(self.root, self.resources))
def extract_file(self) ‑> None
-
Extract file from compressed.
Expand source code
def extract_file(self) -> None: """Extract file from compressed.""" shutil.unpack_archive(os.path.join(self.root, self.resources), self.root) os.remove(os.path.join(self.root, self.resources))
def get_path_and_label(self)
-
Return dataframe type consist of image path and corresponding label.
Expand source code
def get_path_and_label(self): """Return dataframe type consist of image path and corresponding label.""" image_path = [] label = [] for cat, enc in self.classes.items(): cat_path = os.path.join(self.root, "UCMerced_LandUse", self.data_mode, cat) cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)] cat_label = [enc] * len(cat_image) image_path += cat_image label += cat_label df = pd.DataFrame({"image": image_path, "label": label}) return df
class XView (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
Dataset from DIUx xView 2018 Detection Challenge.
Source: https://challenge.xviewdataset.org/data-download (must login)
Args
root
:string
- Root directory of dataset.
train
:bool
, optional- If True, creates dataset from training set, otherwise creates from validation set.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Samples at: - https://storage.googleapis.com/ossjr/xview/train_images.tgz - https://storage.googleapis.com/ossjr/xview/train_labels.tgz - https://storage.googleapis.com/ossjr/xview/validation_images.tgz
Expand source code
class XView(VisionDataset): """Dataset from DIUx xView 2018 Detection Challenge. Source: https://challenge.xviewdataset.org/data-download (must login) Args: root (string): Root directory of dataset. train (bool, optional): If True, creates dataset from training set, otherwise creates from validation set. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. Samples at: - https://storage.googleapis.com/ossjr/xview/train_images.tgz - https://storage.googleapis.com/ossjr/xview/train_labels.tgz - https://storage.googleapis.com/ossjr/xview/validation_images.tgz """ urls = [] resources = ["train_images.tgz", "train_labels.tgz", "validation_images.tgz"] def __init__( self, root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(XView, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.data_mode = "train" if train else "validation" self.class_enc = CLASS_ENC self.class_dec = CLASS_DEC self.coords, self.chips, self.classes = None, None, None if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() if self.data_mode == "train": self.coords, self.chips, self.classes = self.get_path_and_label() self.imgs = list(os.listdir(os.path.join(self.root, "train_images"))) elif self.data_mode == "validation": self.imgs = list(os.listdir(os.path.join(self.root, "val_images"))) def _check_exists(self) -> bool: if not os.path.isdir(self.root): os.mkdir(self.root) return ( os.path.exists(os.path.join(self.root, self.resources[0].split(".")[0])) and os.path.exists(os.path.join(self.root, "xView_train.geojson")) if self.data_mode == "train" else os.path.exists(os.path.join(self.root, "val_images")) ) def download(self): """Download file by asking users to input the link""" train_images = input( "Please follow the following steps to download the required dataset\n" + "1. Visit https://challenge.xviewdataset.org/login\n" + "2. Sign up for an account\n" + "3. Verify your account\n" "4. Follow this link: https://challenge.xviewdataset.org/download-links\n" "5. Copy the link for 'Download Training Images (tgz)' and paste it: " ) train_labels = input("\n6. Copy and paste the link for 'Download Training Labels (tgz)': ") val_images = input("\n7. Copy and paste the link for 'Download Validation Images (tgz)': ") self.urls = [train_images, train_labels, val_images] for idx, url in enumerate(self.urls): _urlretrieve(url, os.path.join(self.root, self.resources[idx])) def extract_file(self): """Extract the .tgz file""" for resource in self.resources: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource)) def _check_exists_label(self, filename): """Check whether bounding boxes, image filenames, and labels are already extracted from xView_train.geojson """ path_to_check = os.path.join(self.root, filename) return path_to_check, os.path.exists(path_to_check) def get_path_and_label(self): """Gets bounding boxes, image filenames, and labels from xView_train.geojson Returns: coords: coordinates of the bounding boxes chips: image file names classes: classes for each ground truth """ # check existnce coords_path, coords_exists = self._check_exists_label("coords.npy") chips_path, chips_exists = self._check_exists_label("chips.npy") classes_path, classes_exists = self._check_exists_label("classes.npy") # if exist, load and return if coords_exists and chips_exists and classes_exists: coords = np.load(coords_path) chips = np.load(chips_path) classes = np.load(classes_path) return coords, chips, classes # read xView_train.geojson fname = os.path.join(self.root, "xView_train.geojson") with open(fname) as f: data = json.load(f) # initialize coords, chips, classes = [], [], [] # extract feat_len = len(data["features"]) img_files = os.listdir(os.path.join(self.root, self.resources[0].split(".")[0])) for i in range(feat_len): properties = data["features"][i]["properties"] b_id = properties["image_id"] val = [int(num) for num in properties["bounds_imcoords"].split(",")] # type_id 75 and 82 don't belong to any class # https://github.com/DIUx-xView/xView1_baseline/issues/3 if properties["type_id"] not in [75, 82] and b_id in img_files: chips.append(b_id) classes.append(properties["type_id"]) coords.append(val) # convert to numpy arrays and save coords = np.array(coords) chips = np.array(chips) classes = np.array(classes) np.save(coords_path, coords) np.save(chips_path, chips) np.save(classes_path, classes) return coords, chips, classes def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, target) where target is a dictionary of target consists of bounding boxes and labels. """ if self.data_mode == "train": # image img_path = os.path.join(self.root, "train_images", self.chips[idx]) img = np.array(_load_img(img_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) # bounding box bbox = self.coords[self.chips == self.chips[idx]] # label label = self.classes[self.chips == self.chips[idx]] label = np.vectorize(index_mapping.get)(label) # combine bounding box and label target = {} target["boxes"] = bbox target["labels"] = label sample = (img, target) elif self.data_mode == "validation": # image img_path = os.path.join(self.root, "val_images", self.imgs[idx]) img = np.array(_load_img(img_path)) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) sample = img return sample def __len__(self) -> int: return len(self.imgs)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var resources
var urls
Methods
def download(self)
-
Download file by asking users to input the link
Expand source code
def download(self): """Download file by asking users to input the link""" train_images = input( "Please follow the following steps to download the required dataset\n" + "1. Visit https://challenge.xviewdataset.org/login\n" + "2. Sign up for an account\n" + "3. Verify your account\n" "4. Follow this link: https://challenge.xviewdataset.org/download-links\n" "5. Copy the link for 'Download Training Images (tgz)' and paste it: " ) train_labels = input("\n6. Copy and paste the link for 'Download Training Labels (tgz)': ") val_images = input("\n7. Copy and paste the link for 'Download Validation Images (tgz)': ") self.urls = [train_images, train_labels, val_images] for idx, url in enumerate(self.urls): _urlretrieve(url, os.path.join(self.root, self.resources[idx]))
def extract_file(self)
-
Extract the .tgz file
Expand source code
def extract_file(self): """Extract the .tgz file""" for resource in self.resources: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource))
def get_path_and_label(self)
-
Gets bounding boxes, image filenames, and labels from xView_train.geojson
Returns
coords
- coordinates of the bounding boxes
chips
- image file names
classes
- classes for each ground truth
Expand source code
def get_path_and_label(self): """Gets bounding boxes, image filenames, and labels from xView_train.geojson Returns: coords: coordinates of the bounding boxes chips: image file names classes: classes for each ground truth """ # check existnce coords_path, coords_exists = self._check_exists_label("coords.npy") chips_path, chips_exists = self._check_exists_label("chips.npy") classes_path, classes_exists = self._check_exists_label("classes.npy") # if exist, load and return if coords_exists and chips_exists and classes_exists: coords = np.load(coords_path) chips = np.load(chips_path) classes = np.load(classes_path) return coords, chips, classes # read xView_train.geojson fname = os.path.join(self.root, "xView_train.geojson") with open(fname) as f: data = json.load(f) # initialize coords, chips, classes = [], [], [] # extract feat_len = len(data["features"]) img_files = os.listdir(os.path.join(self.root, self.resources[0].split(".")[0])) for i in range(feat_len): properties = data["features"][i]["properties"] b_id = properties["image_id"] val = [int(num) for num in properties["bounds_imcoords"].split(",")] # type_id 75 and 82 don't belong to any class # https://github.com/DIUx-xView/xView1_baseline/issues/3 if properties["type_id"] not in [75, 82] and b_id in img_files: chips.append(b_id) classes.append(properties["type_id"]) coords.append(val) # convert to numpy arrays and save coords = np.array(coords) chips = np.array(chips) classes = np.array(classes) np.save(coords_path, coords) np.save(chips_path, chips) np.save(classes_path, classes) return coords, chips, classes