Module `earthvision.datasets`

Expand source code

from .drone_deploy import DroneDeploy
from .aerialcactus import AerialCactus
from .resisc45 import RESISC45
from .ucmercedland import UCMercedLand
from .eurosat import EuroSat
from .l8sparcs import L8SPARCS
from .deepsat import DeepSat
from .landcover import LandCover
from .cowc import COWC
from .l7irish import L7Irish
from .sentinel2cloud import Sentinel2Cloud
from .spacenet7 import SpaceNet7
from .xview import XView
from .cloud38 import Cloud38
from .so2sat import So2Sat
from .l8biome import L8Biome


__all__ = [
    "DroneDeploy",
    "AerialCactus",
    "RESISC45",
    "UCMercedLand",
    "EuroSat",
    "L8SPARCS",
    "DeepSat",
    "LandCover",
    "COWC",
    "L7Irish",
    "Sentinel2Cloud",
    "SpaceNet7",
    "XView",
    "Cloud38",
    "So2Sat",
    "L8Biome",
]

Sub-modules

earthvision.datasets.aerialcactus: Aerial Cactus Dataset from Kaggle.
earthvision.datasets.cloud38: 38-Cloud: A Cloud Segmentation Dataset.
earthvision.datasets.cowc: Cars Overhead with Context Dataset.
earthvision.datasets.deepsat: Deepsat Dataset - Scene Classification.
earthvision.datasets.drone_deploy: Drone Deploy Dataset - Semantic Segmentation.
earthvision.datasets.eurosat: EuroSat Land Cover Categories Dataset.
earthvision.datasets.l7irish: Landsat 7 Irish Cloud Dataset.
earthvision.datasets.l8biome: L8 Biome Cloud Cover Dataset.
earthvision.datasets.l8sparcs: Landsat 8 SPARCS Cloud Dataset.
earthvision.datasets.landcover: The LandCover.ai (Land Cover from Aerial Imagery) Dataset.
earthvision.datasets.resisc45: RESISC45 Dataset.
earthvision.datasets.sentinel2cloud: Sentinel-2 Cloud Mask Catalogue Dataset.
earthvision.datasets.so2sat: So2Sat Dataset to Predict Local Climate Zone (LCZ).
earthvision.datasets.spacenet7: SpaceNet 7 Dataset: Multi-Temporal Urban Development Challenge - Instance Segmentation.
earthvision.datasets.spacenet7_utils: Script from: - https://github.com/CosmiQ/solaris - https://github.com/avanetten/CosmiQ_SN7_Baseline/blob/master/src/sn7_baseline_prep_funcs.py
earthvision.datasets.ucmercedland: UC Merced Land Use Dataset.
earthvision.datasets.utils: Utility functions.
earthvision.datasets.vision: Vision Dataset from torchvision/datasets/vision.py
earthvision.datasets.xview: Dataset from DIUx xView 2018 Detection Challenge.

Classes

class AerialCactus (root: str, train: bool = True, transform=Compose( Resize(size=(32, 32), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)

Aerial Cactus Dataset.

https://www.kaggle.com/c/aerial-cactus-identification

Args

root : string: Root directory of dataset.
train : bool, optional: If True, creates dataset from training set, otherwise creates from validation set.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class AerialCactus(VisionDataset):
    """Aerial Cactus Dataset.
    
    <https://www.kaggle.com/c/aerial-cactus-identification>

    Args:
        root (string): Root directory of dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from validation set.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://storage.googleapis.com/ossjr"
    resources = "cactus-aerial-photos.zip"

    def __init__(
        self,
        root: str,
        train: bool = True,
        transform=Compose([Resize((32, 32)), ToTensor()]),
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(AerialCactus, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root
        self.data_mode = "training_set" if train else "validation_set"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img_path = self.img_labels.iloc[idx, 0]
        img = np.array(_load_img(img_path))
        target = self.img_labels.iloc[idx, 1]

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.img_labels)

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label."""
        classes = {"cactus": 1, "no_cactus": 0}
        image_path, label = [], []

        for cat, enc in classes.items():
            cat_path = os.path.join(
                self.root, "cactus-aerial-photos", self.data_mode, self.data_mode, cat
            )
            cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
            cat_label = [enc] * len(cat_image)
            image_path += cat_image
            label += cat_label
        df = pd.DataFrame({"image": image_path, "label": label})

        return df

    def _check_exists(self):
        self.train_path = os.path.join(
            self.root, "cactus-aerial-photos", "training_set", "training_set"
        )
        self.valid_path = os.path.join(
            self.root, "cactus-aerial-photos", "validation_set", "validation_set"
        )

        folder_status = []
        for path in [self.train_path, self.valid_path]:
            for target in ["cactus", "no_cactus"]:
                folder_status.append(os.path.exists(os.path.join(path, target)))

        return all(folder_status)

    def download(self) -> None:
        """Download and extract file."""
        os.makedirs(self.root, exist_ok=True)

        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract file from compressed."""
        path_destination = os.path.join(self.root, "cactus-aerial-photos")
        shutil.unpack_archive(os.path.join(self.root, self.resources), path_destination)
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

Download and extract file.

Expand source code

def download(self) -> None:
    """Download and extract file."""
    os.makedirs(self.root, exist_ok=True)

    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract file from compressed.

Expand source code

def extract_file(self) -> None:
    """Extract file from compressed."""
    path_destination = os.path.join(self.root, "cactus-aerial-photos")
    shutil.unpack_archive(os.path.join(self.root, self.resources), path_destination)
    os.remove(os.path.join(self.root, self.resources))

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label."""
    classes = {"cactus": 1, "no_cactus": 0}
    image_path, label = [], []

    for cat, enc in classes.items():
        cat_path = os.path.join(
            self.root, "cactus-aerial-photos", self.data_mode, self.data_mode, cat
        )
        cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
        cat_label = [enc] * len(cat_image)
        image_path += cat_image
        label += cat_label
    df = pd.DataFrame({"image": image_path, "label": label})

    return df

class COWC (root: str, train: bool = True, task_mode: str = 'counting', transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Cars Overhead with Context.

https://gdo152.llnl.gov/cowc/

Args

root : string: Root directory of dataset.
train : bool, optional: If True, creates dataset from training set, otherwise creates from test set.
task_mode : string: There is 2 task mode i.e. 'counting' and 'detection'. Default value is 'counting'.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class COWC(VisionDataset):
    """Cars Overhead with Context.
    
    https://gdo152.llnl.gov/cowc/

    Args:
        root (string): Root directory of dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        task_mode (string): There is 2 task mode i.e. 'counting' and 'detection'. Default value is 'counting'.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://gdo152.llnl.gov/cowc/download"
    resources = "cowc-everything.txz"

    def __init__(
        self,
        root: str,
        train: bool = True,
        task_mode: str = "counting",
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(COWC, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.train = train
        self.task_mode = task_mode

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        if self.task_mode == "counting":
            self.task_path = os.path.join(self.root, "cowc/datasets/patch_sets/counting")
            self.file_mapping = file_mapping_counting
        elif self.task_mode == "detection":
            self.task_path = os.path.join(self.root, "cowc/datasets/patch_sets/detection")
            self.file_mapping = file_mapping_detection
        else:
            raise ValueError("task_mode not recognized.")

        for filename, compressed in self.file_mapping.items():
            if not self._check_exists_subfile(filename):
                self.extract_subfile(filename, compressed)

        self.img_labels = self.get_path_and_label()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img_path = self.img_labels.iloc[idx, 0]
        target = self.img_labels.iloc[idx, 1]
        folder = img_path.split("/", 1)[0]
        img_path = os.path.join(self.task_path, folder, img_path)
        img = np.array(_load_img(img_path))

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.img_labels)

    def get_path_and_label(self):
        """Return dataframe type consist of image path
        and corresponding label."""

        if self.task_mode == "counting":
            if self.train:
                label_name = "COWC_train_list_64_class.txt.bz2"
            else:
                label_name = "COWC_test_list_64_class.txt.bz2"

        elif self.task_mode == "detection":
            if self.train:
                label_name = "COWC_train_list_detection.txt.bz2"
            else:
                label_name = "COWC_test_list_detection.txt.bz2"

        else:
            raise ValueError("task_mode not recognized.")

        label_path = os.path.join(self.task_path, label_name)
        df = pd.read_csv(label_path, sep=" ", header=None)

        return df

    def _check_exists_subfile(self, filename):
        path_to_check = os.path.join(self.task_path, filename)
        return os.path.exists(path_to_check)

    def extract_subfile(self, filename, compressed):
        comp_path = os.path.join(self.task_path, compressed)
        file_path = os.path.join(self.task_path, filename)
        tar = tarfile.open(comp_path)
        tar.extractall(file_path)
        tar.close()

    def _check_exists(self):
        return os.path.exists(os.path.join(self.root, "cowc"))

    def download(self) -> None:
        """download file."""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract file from compressed."""
        shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

download file.

Expand source code

def download(self) -> None:
    """download file."""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract file from compressed.

Expand source code

def extract_file(self) -> None:
    """Extract file from compressed."""
    shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
    os.remove(os.path.join(self.root, self.resources))

def extract_subfile(self, filename, compressed)

Expand source code

def extract_subfile(self, filename, compressed):
    comp_path = os.path.join(self.task_path, compressed)
    file_path = os.path.join(self.task_path, filename)
    tar = tarfile.open(comp_path)
    tar.extractall(file_path)
    tar.close()

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path
    and corresponding label."""

    if self.task_mode == "counting":
        if self.train:
            label_name = "COWC_train_list_64_class.txt.bz2"
        else:
            label_name = "COWC_test_list_64_class.txt.bz2"

    elif self.task_mode == "detection":
        if self.train:
            label_name = "COWC_train_list_detection.txt.bz2"
        else:
            label_name = "COWC_test_list_detection.txt.bz2"

    else:
        raise ValueError("task_mode not recognized.")

    label_path = os.path.join(self.task_path, label_name)
    df = pd.read_csv(label_path, sep=" ", header=None)

    return df

class Cloud38 (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Cloud 38 Dataset.

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class Cloud38(VisionDataset):
    """Cloud 38 Dataset.

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "http://vault.sfu.ca/index.php/s/pymNqYF09JkM8Bp/download"
    resources = "38cloud.zip"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(Cloud38, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.data_path = os.path.join(self.root, "38cloud")
        self.base_path = Path(os.path.join(self.data_path, "38-Cloud_training"))

        if not os.path.exists(self.root):
            os.makedirs(self.root)

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.file_validator()
        self.labels = self.get_path()
        print("Done.")

    def file_validator(self):
        if not (self.base_path / "train_rgb").exists():
            (self.base_path / "train_rgb").mkdir()

        if not (self.base_path / "labels").exists():
            (self.base_path / "labels").mkdir()

        for red_patch in (self.base_path / "train_red").iterdir():
            self.create_rgb_pil(red_patch)

        for gt_patch in (self.base_path / "train_gt").iterdir():
            self.convert_tif_png(gt_patch, self.base_path / "labels")

    def get_path(self):
        label = []
        path_label = os.path.join(self.base_path, "labels")
        path_gt = os.path.join(self.base_path, "train_gt")
        label_listing = [os.path.join(path_label, i) for i in os.listdir(path_label)]
        gt_listing = [os.path.join(path_gt, i) for i in os.listdir(path_gt)]
        return pd.DataFrame({"GT": gt_listing, "Label": label_listing})

    def create_rgb_pil(self, red_filename: Path):
        """Combining three bands to RGB format"""
        self.red_filename = str(red_filename)
        green_fn = self.red_filename.replace("red", "green")
        blue_fn = self.red_filename.replace("red", "blue")
        rgb_fn = self.red_filename.replace("red", "rgb").replace(".TIF", ".png")

        array_red = np.array(Image.open(self.red_filename))
        array_green = np.array(Image.open(green_fn))
        array_blue = np.array(Image.open(blue_fn))

        array_rgb = np.stack([array_red, array_green, array_blue], axis=2)
        array_rgb = array_rgb / np.iinfo(array_rgb.dtype).max

        rgb = Image.fromarray((256 * array_rgb).astype(np.uint8), "RGB")
        rgb.save(rgb_fn)
        return rgb

    def convert_tif_png(self, tif_file: Path, out_folder: Path):
        """Converting TIF file to PNG format"""
        self.tif_file = tif_file
        self.out_folder = out_folder
        array_tif = np.array(Image.open(self.tif_file))
        im = Image.fromarray(np.where(array_tif == 255, 1, 0))
        im.save(self.out_folder / self.tif_file.with_suffix(".png").name)
        return im

    def __len__(self) -> int:
        return len(self.img_labels)

    def download(self) -> None:
        """download and extract file."""
        _urlretrieve(self.mirrors, os.path.join(self.root, self.resources))

    def _check_exists(self):
        """Check file has been download or not"""
        folders = [
            "38-Cloud_95-Cloud_Test_Metadata_Files",
            "38-Cloud_test",
            "38-Cloud_training",
            "38-Cloud_Training_Metadata_Files",
        ]

        status = [
            os.path.exists(os.path.join(self.data_path, folder_pth)) for folder_pth in folders
        ]
        return all(status)

    def extract_file(self):
        """Extract file from the compressed"""
        print("Extracting...")
        shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def convert_tif_png(self, tif_file: pathlib.Path, out_folder: pathlib.Path)

Converting TIF file to PNG format

Expand source code

def convert_tif_png(self, tif_file: Path, out_folder: Path):
    """Converting TIF file to PNG format"""
    self.tif_file = tif_file
    self.out_folder = out_folder
    array_tif = np.array(Image.open(self.tif_file))
    im = Image.fromarray(np.where(array_tif == 255, 1, 0))
    im.save(self.out_folder / self.tif_file.with_suffix(".png").name)
    return im

def create_rgb_pil(self, red_filename: pathlib.Path)

Combining three bands to RGB format

Expand source code

def create_rgb_pil(self, red_filename: Path):
    """Combining three bands to RGB format"""
    self.red_filename = str(red_filename)
    green_fn = self.red_filename.replace("red", "green")
    blue_fn = self.red_filename.replace("red", "blue")
    rgb_fn = self.red_filename.replace("red", "rgb").replace(".TIF", ".png")

    array_red = np.array(Image.open(self.red_filename))
    array_green = np.array(Image.open(green_fn))
    array_blue = np.array(Image.open(blue_fn))

    array_rgb = np.stack([array_red, array_green, array_blue], axis=2)
    array_rgb = array_rgb / np.iinfo(array_rgb.dtype).max

    rgb = Image.fromarray((256 * array_rgb).astype(np.uint8), "RGB")
    rgb.save(rgb_fn)
    return rgb

def download(self) ‑> None

download and extract file.

Expand source code

def download(self) -> None:
    """download and extract file."""
    _urlretrieve(self.mirrors, os.path.join(self.root, self.resources))

def extract_file(self)

Extract file from the compressed

Expand source code

def extract_file(self):
    """Extract file from the compressed"""
    print("Extracting...")
    shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
    os.remove(os.path.join(self.root, self.resources))

def file_validator(self)

Expand source code

def file_validator(self):
    if not (self.base_path / "train_rgb").exists():
        (self.base_path / "train_rgb").mkdir()

    if not (self.base_path / "labels").exists():
        (self.base_path / "labels").mkdir()

    for red_patch in (self.base_path / "train_red").iterdir():
        self.create_rgb_pil(red_patch)

    for gt_patch in (self.base_path / "train_gt").iterdir():
        self.convert_tif_png(gt_patch, self.base_path / "labels")

def get_path(self)

Expand source code

def get_path(self):
    label = []
    path_label = os.path.join(self.base_path, "labels")
    path_gt = os.path.join(self.base_path, "train_gt")
    label_listing = [os.path.join(path_label, i) for i in os.listdir(path_label)]
    gt_listing = [os.path.join(path_gt, i) for i in os.listdir(path_gt)]
    return pd.DataFrame({"GT": gt_listing, "Label": label_listing})

class DeepSat (root: str, dataset_type='SAT-4', train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

DeepSat Dataset.

Args

root : string: Root directory of dataset.
dataset_type : string, optional: Choose dataset type ['SAT-4', 'SAT-6'].
train : bool, optional: If True, creates dataset from training set, otherwise creates from test set.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class DeepSat(VisionDataset):
    """DeepSat Dataset.

    Args:
        root (string): Root directory of dataset.
        dataset_type (string, optional): Choose dataset type ['SAT-4', 'SAT-6'].
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    resources = {
        "SAT-4_and_SAT-6_datasets": "https://drive.google.com/uc?id=0B0Fef71_vt3PUkZ4YVZ5WWNvZWs&export=download"
    }
    dataset_types = ["SAT-4", "SAT-6"]

    def __init__(
        self,
        root: str,
        dataset_type="SAT-4",
        train: bool = True,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(DeepSat, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.dataset_type = dataset_type
        self.train = train
        self.folder_pth = os.path.join(self.root, list(self.resources.keys())[0])
        self.filename = list(self.resources.keys())[0] + ".tar.gz"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()

        dataset = self.load_dataset()
        self.choose_data_mode(dataset)

    def download(self) -> None:
        """Download dataset and extract it"""

        self.root = os.path.expanduser(self.root)
        print("Download dataset...")

        gdown.download(
            self.resources["SAT-4_and_SAT-6_datasets"],
            os.path.join(self.root, self.filename),
            quiet=False,
        )

        if os.path.exists(self.folder_pth):
            print(f"file {self.folder_pth} already exists")
        else:
            os.mkdir(self.folder_pth)
            print(f"Extracting file {self.filename}")
            os.system(f"tar -xvf {os.path.join(self.root, self.filename)} -C {self.folder_pth}")
            os.system(f"mv {self.folder_pth} {self.root}")
            print("Extracting file success !")

    def _check_exists(self) -> bool:
        if self.dataset_type not in self.dataset_types:
            print(f"Unknown dataset {self.dataset_type}")
            print(f"Available dataset : {self.dataset_types}")
            sys.exit(0)

        if os.path.exists(self.filename):
            return True
        else:
            return False

    def load_dataset(self):
        filename = {"SAT-4": "sat-4-full.mat", "SAT-6": "sat-6-full.mat"}
        dataset = sio.loadmat(os.path.join(self.folder_pth, filename[self.dataset_type]))
        return dataset

    def choose_data_mode(self, dataset):
        if self.train:
            x_type, y_type = "train_x", "train_y"
        else:
            x_type, y_type = "test_x", "test_y"

        self.x, self.y = dataset[x_type], dataset[y_type]
        self.annot = dataset["annotations"]

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img = self.x[:, :, :, idx]
        target = self.y[:, idx]

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return self.x.shape[3]

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var dataset_types
var functions : Dict[str, Callable]
var resources

Methods

def choose_data_mode(self, dataset)

Expand source code

def choose_data_mode(self, dataset):
    if self.train:
        x_type, y_type = "train_x", "train_y"
    else:
        x_type, y_type = "test_x", "test_y"

    self.x, self.y = dataset[x_type], dataset[y_type]
    self.annot = dataset["annotations"]

def download(self) ‑> None

Download dataset and extract it

Expand source code

def download(self) -> None:
    """Download dataset and extract it"""

    self.root = os.path.expanduser(self.root)
    print("Download dataset...")

    gdown.download(
        self.resources["SAT-4_and_SAT-6_datasets"],
        os.path.join(self.root, self.filename),
        quiet=False,
    )

    if os.path.exists(self.folder_pth):
        print(f"file {self.folder_pth} already exists")
    else:
        os.mkdir(self.folder_pth)
        print(f"Extracting file {self.filename}")
        os.system(f"tar -xvf {os.path.join(self.root, self.filename)} -C {self.folder_pth}")
        os.system(f"mv {self.folder_pth} {self.root}")
        print("Extracting file success !")

def load_dataset(self)

Expand source code

def load_dataset(self):
    filename = {"SAT-4": "sat-4-full.mat", "SAT-6": "sat-6-full.mat"}
    dataset = sio.loadmat(os.path.join(self.folder_pth, filename[self.dataset_type]))
    return dataset

class DroneDeploy (root: str, dataset_type='dataset-sample', data_mode: int = 0, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Drone Deploy Semantic Dataset.

Args

root : string: Root directory of dataset.
dataset_type : string, optional: Choose dataset type.
data_mode : int: 0 for train data, 1 for validation data, and 2 for testing data
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class DroneDeploy(VisionDataset):
    """Drone Deploy Semantic Dataset.

    Args:
        root (string): Root directory of dataset.
        dataset_type (string, optional): Choose dataset type.
        data_mode (int): 0 for train data, 1 for validation data, and 2 for testing data
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.

    """

    resources = {
        "dataset-sample": "https://dl.dropboxusercontent.com/s/h8a8kev0rktf4kq/dataset-sample.tar.gz?dl=0",
        "dataset-medium": "https://dl.dropboxusercontent.com/s/r0dj9mhyv4bgbme/dataset-medium.tar.gz?dl=0",
    }

    def __init__(
        self,
        root: str,
        dataset_type="dataset-sample",
        data_mode: int = 0,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(DroneDeploy, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root
        self.dataset_type = dataset_type
        self.filename = f"{dataset_type}.tar.gz"
        self.filepath = os.path.join(self.root, self.filename)
        self.data_mode = data_mode
        self.label_path = f"{dataset_type}/label-chips"
        self.image_path = f"{dataset_type}/image-chips"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()

        self.load_dataset()

    def download(self) -> None:
        """Download a dataset, extract it and create the tiles."""
        print(f'Downloading "{self.dataset_type}"')
        self.root = os.path.expanduser(self.root)
        fpath = os.path.join(self.root, self.filename)
        _urlretrieve(self.resources[self.dataset_type], fpath)

        if not os.path.exists(os.path.join(self.root, self.dataset_type)):
            print(f'Extracting "{self.filepath}"')
            os.system(f"tar -xvf {self.filepath}")
            os.system(f"mv {self.dataset_type} {self.root}")
        else:
            print(f'Folder "{self.dataset_type}" already exists.')

        image_chips = f"{self.dataset_type}/image-chips"
        label_chips = f"{self.dataset_type}/label-chips"

        if not os.path.exists(image_chips):
            os.mkdir(os.path.join(self.root, image_chips))
        if not os.path.exists(label_chips):
            os.mkdir(os.path.join(self.root, label_chips))

        run(os.path.join(self.root, self.dataset_type))

    def _check_exists(self) -> bool:
        if self.dataset_type not in self.resources.keys():
            print(f"Unknown dataset {self.dataset_type}")
            print(f"Available dataset : {self.resources.keys()}")
            sys.exit(0)

        if os.path.exists(self.filepath):
            return True
        else:
            return False

    def load_dataset(self):
        if self.data_mode == 0:
            list_chip = "train.txt"
        elif self.data_mode == 1:
            list_chip = "valid.txt"
        elif self.data_mode == 2:
            list_chip = "test.txt"

        files = [
            f"{os.path.join(self.root, self.dataset_type)}/image-chips/{fname}"
            for fname in load_lines(os.path.join(self.root, self.dataset_type, list_chip))
        ]
        self.image_files = files

    def __getitem__(self, idx) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        image_file = self.image_files[idx]
        label_file = image_file.replace(self.image_path, self.label_path)

        img = np.array(load_img(image_file))
        target = mask_to_classes(load_img(label_file))
        target = np.array(target)

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.image_files)

    def on_epoch_end(self):
        random.shuffle(self.image_files)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var resources

Methods

def download(self) ‑> None

Download a dataset, extract it and create the tiles.

Expand source code

def download(self) -> None:
    """Download a dataset, extract it and create the tiles."""
    print(f'Downloading "{self.dataset_type}"')
    self.root = os.path.expanduser(self.root)
    fpath = os.path.join(self.root, self.filename)
    _urlretrieve(self.resources[self.dataset_type], fpath)

    if not os.path.exists(os.path.join(self.root, self.dataset_type)):
        print(f'Extracting "{self.filepath}"')
        os.system(f"tar -xvf {self.filepath}")
        os.system(f"mv {self.dataset_type} {self.root}")
    else:
        print(f'Folder "{self.dataset_type}" already exists.')

    image_chips = f"{self.dataset_type}/image-chips"
    label_chips = f"{self.dataset_type}/label-chips"

    if not os.path.exists(image_chips):
        os.mkdir(os.path.join(self.root, image_chips))
    if not os.path.exists(label_chips):
        os.mkdir(os.path.join(self.root, label_chips))

    run(os.path.join(self.root, self.dataset_type))

def load_dataset(self)

Expand source code

def load_dataset(self):
    if self.data_mode == 0:
        list_chip = "train.txt"
    elif self.data_mode == 1:
        list_chip = "valid.txt"
    elif self.data_mode == 2:
        list_chip = "test.txt"

    files = [
        f"{os.path.join(self.root, self.dataset_type)}/image-chips/{fname}"
        for fname in load_lines(os.path.join(self.root, self.dataset_type, list_chip))
    ]
    self.image_files = files

def on_epoch_end(self)

Expand source code

def on_epoch_end(self):
    random.shuffle(self.image_files)

class EuroSat (root: str, transform=Compose( Resize(size=(64, 64), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)

EuroSat Land Cover Categories.

http://madm.dfki.de/files/sentinel

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class EuroSat(VisionDataset):
    """EuroSat Land Cover Categories.

    <http://madm.dfki.de/files/sentinel>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "http://madm.dfki.de/files/sentinel"
    resources = "EuroSAT.zip"
    classes = {
        "AnnualCrop": 0,
        "Forest": 1,
        "HerbaceousVegetation": 2,
        "Highway": 3,
        "Industrial": 4,
        "Pasture": 5,
        "PermanentCrop": 6,
        "Residential": 7,
        "River": 8,
        "SeaLake": 9,
    }

    def __init__(
        self,
        root: str,
        transform=Compose([Resize((64, 64)), ToTensor()]),
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(EuroSat, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.data_mode = "2750"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img_path = self.img_labels.iloc[idx, 0]
        img = np.array(_load_img(img_path))
        target = self.img_labels.iloc[idx, 1]

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.img_labels)

    def _check_exists(self) -> None:
        self.data_path = os.path.join(self.root, self.data_mode)
        self.dir_classes = list(self.classes.keys())

        return all([os.path.exists(os.path.join(self.data_path, i)) for i in self.dir_classes])

    def download(self) -> None:
        """Download file"""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract the .zip file"""
        shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
        os.remove(os.path.join(self.root, self.resources))

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label."""
        image_path = []
        label = []
        for cat, enc in self.classes.items():
            cat_path = os.path.join(self.root, self.data_mode, cat)
            cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
            cat_label = [enc] * len(cat_image)
            image_path += cat_image
            label += cat_label
        df = pd.DataFrame({"image": image_path, "label": label})

        return df

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var classes
var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

Download file

Expand source code

def download(self) -> None:
    """Download file"""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract the .zip file

Expand source code

def extract_file(self) -> None:
    """Extract the .zip file"""
    shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
    os.remove(os.path.join(self.root, self.resources))

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label."""
    image_path = []
    label = []
    for cat, enc in self.classes.items():
        cat_path = os.path.join(self.root, self.data_mode, cat)
        cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
        cat_label = [enc] * len(cat_image)
        image_path += cat_image
        label += cat_label
    df = pd.DataFrame({"image": image_path, "label": label})

    return df

class L7Irish (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Landsat 7 Irish Cloud.

https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class L7Irish(VisionDataset):
    """Landsat 7 Irish Cloud.

    <https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "http://landsat.usgs.gov/cloud-validation/cca_irish_2015/"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(L7Irish, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.download_urls = self.get_download_url()
        self.resources = [url.split("/")[-1] for url in self.download_urls]
        self.data_modes = [filename.split(".tar.gz")[0] for filename in self.resources]

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def get_download_url(self):
        """Get the urls to download the files."""
        page = requests.get(
            "https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data"
        )
        soup = BeautifulSoup(page.content, "html.parser")

        urls = [url.get("href") for url in soup.find_all("a")]
        urls = list(filter(None, urls))

        download_urls = filter(lambda url: url.endswith(".gz"), urls)
        return download_urls

    def download(self):
        """Download file"""
        for resource in self.resources:
            file_url = posixpath.join(self.mirrors, resource)
            _urlretrieve(file_url, os.path.join(self.root, resource))

    def extract_file(self):
        """Extract the .zip file"""
        for resource in self.resources:
            shutil.unpack_archive(os.path.join(self.root, resource), self.root)
            os.remove(os.path.join(self.root, resource))

    def _check_exists(self):
        is_exists = []
        if not os.path.isdir(self.root):
            os.mkdir(self.root)

        for data_mode in self.data_modes:
            data_path = os.path.join(self.root, data_mode)
            is_exists.append(os.path.exists(data_path))

        return all(is_exists)

    def get_path_and_label(self):
        """Get the path of the images and labels (masks) in a dataframe"""
        image_path, label = [], []

        for data_mode in self.data_modes:
            for image in glob.glob(os.path.join(self.root, data_mode, "L7*.TIF")):
                image_path.append(image)

                label.extend(glob.glob(os.path.join(self.root, data_mode, "*mask*")))

        df = pd.DataFrame({"image": image_path, "label": label})
        return df

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask)
        """
        img_path = self.img_labels.iloc[idx, 0]
        mask_path = self.img_labels.iloc[idx, 1]

        img = np.array(_load_img(img_path))
        mask = np.array(_load_img(mask_path))

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            mask = Image.fromarray(mask)
            mask = self.target_transform(mask)
        return img, mask

    def __len__(self) -> int:
        return len(self.img_labels)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors

Methods

def download(self)

Download file

Expand source code

def download(self):
    """Download file"""
    for resource in self.resources:
        file_url = posixpath.join(self.mirrors, resource)
        _urlretrieve(file_url, os.path.join(self.root, resource))

def extract_file(self)

Extract the .zip file

Expand source code

def extract_file(self):
    """Extract the .zip file"""
    for resource in self.resources:
        shutil.unpack_archive(os.path.join(self.root, resource), self.root)
        os.remove(os.path.join(self.root, resource))

def get_download_url(self)

Get the urls to download the files.

Expand source code

def get_download_url(self):
    """Get the urls to download the files."""
    page = requests.get(
        "https://landsat.usgs.gov/landsat-7-cloud-cover-assessment-validation-data"
    )
    soup = BeautifulSoup(page.content, "html.parser")

    urls = [url.get("href") for url in soup.find_all("a")]
    urls = list(filter(None, urls))

    download_urls = filter(lambda url: url.endswith(".gz"), urls)
    return download_urls

def get_path_and_label(self)

Get the path of the images and labels (masks) in a dataframe

Expand source code

def get_path_and_label(self):
    """Get the path of the images and labels (masks) in a dataframe"""
    image_path, label = [], []

    for data_mode in self.data_modes:
        for image in glob.glob(os.path.join(self.root, data_mode, "L7*.TIF")):
            image_path.append(image)

            label.extend(glob.glob(os.path.join(self.root, data_mode, "*mask*")))

    df = pd.DataFrame({"image": image_path, "label": label})
    return df

class L8Biome (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

L8 Biome Cloud Cover.

Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class L8Biome(VisionDataset):
    """L8 Biome Cloud Cover.

    Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(L8Biome, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.download_urls = self.get_download_url()
        self.data_modes = [url.split("/")[-1] for url in self.download_urls]

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def get_download_url(self):
        """Get the urls to download the files."""
        page = requests.get(self.mirrors)
        soup = BeautifulSoup(page.content, "html.parser")

        urls = [url.get("href") for url in soup.find_all("a")]

        download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls))
        return download_urls

    def download(self):
        """Download file"""
        for resource in self.download_urls:
            filename = resource.split("/")[-1]
            _urlretrieve(resource, os.path.join(self.root, filename))

    def extract_file(self):
        """Extract the .zip file"""
        for resource in self.data_modes:
            shutil.unpack_archive(os.path.join(self.root, resource), self.root)
            os.remove(os.path.join(self.root, resource))

    def _check_exists(self):
        is_exists = []
        if not os.path.isdir(self.root):
            os.mkdir(self.root)

        for data_mode in self.data_modes:
            data_mode = data_mode.replace(".tar.gz", "")
            data_path = os.path.join(self.root, "BC", data_mode)
            is_exists.append(os.path.exists(data_path))

        return all(is_exists)

    def get_path_and_label(self):
        """Get the path of the images and labels (masks) in a dataframe"""
        image_directory, label = [], []

        for data_mode in self.data_modes:
            data_mode = data_mode.replace(".tar.gz", "")
            image_dir = os.path.join(self.root, "BC", data_mode)

            image_directory.append(image_dir)
            label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr")))

        df = pd.DataFrame({"image": image_directory, "label": label})
        return df

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask)
        """
        img_directory = self.img_labels.iloc[idx, 0]
        mask_path = self.img_labels.iloc[idx, 1]

        ls_stack_path = []
        for idx in range(1, 12):
            observation = img_directory.split("/")[-1]
            name_file = f"{img_directory}/{observation}_B{idx}.TIF"
            ls_stack_path.append(name_file)

        img = _load_stack_img(ls_stack_path)
        mask = _load_img_hdr(mask_path)

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            mask = Image.fromarray(mask)
            mask = self.target_transform(mask)

        return img, mask

    def __len__(self) -> int:
        return len(self.img_labels)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors

Methods

def download(self)

Download file

Expand source code

def download(self):
    """Download file"""
    for resource in self.download_urls:
        filename = resource.split("/")[-1]
        _urlretrieve(resource, os.path.join(self.root, filename))

def extract_file(self)

Extract the .zip file

Expand source code

def extract_file(self):
    """Extract the .zip file"""
    for resource in self.data_modes:
        shutil.unpack_archive(os.path.join(self.root, resource), self.root)
        os.remove(os.path.join(self.root, resource))

def get_download_url(self)

Get the urls to download the files.

Expand source code

def get_download_url(self):
    """Get the urls to download the files."""
    page = requests.get(self.mirrors)
    soup = BeautifulSoup(page.content, "html.parser")

    urls = [url.get("href") for url in soup.find_all("a")]

    download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls))
    return download_urls

def get_path_and_label(self)

Get the path of the images and labels (masks) in a dataframe

Expand source code

def get_path_and_label(self):
    """Get the path of the images and labels (masks) in a dataframe"""
    image_directory, label = [], []

    for data_mode in self.data_modes:
        data_mode = data_mode.replace(".tar.gz", "")
        image_dir = os.path.join(self.root, "BC", data_mode)

        image_directory.append(image_dir)
        label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr")))

    df = pd.DataFrame({"image": image_directory, "label": label})
    return df

class L8SPARCS (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Landsat 8 SPARCS Cloud.

https://www.usgs.gov/core-science-systems/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs

Download: https://landsat.usgs.gov/cloud-validation/sparcs/l8cloudmasks.zip

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class L8SPARCS(VisionDataset):
    """Landsat 8 SPARCS Cloud.
    
    <https://www.usgs.gov/core-science-systems/nli/landsat/spatial-procedures-automated-removal-cloud-and-shadow-sparcs>
    
    Download: <https://landsat.usgs.gov/cloud-validation/sparcs/l8cloudmasks.zip>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://landsat.usgs.gov/cloud-validation/sparcs/"
    resources = "l8cloudmasks.zip"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(L8SPARCS, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.data_mode = "sending"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def _check_exists(self) -> None:
        self.data_path = os.path.join(self.root, self.data_mode)
        return os.path.exists(self.data_path)

    def download(self) -> None:
        """Download file"""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract the .zip file"""
        shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
        os.remove(os.path.join(self.root, self.resources))

    def get_path_and_label(self):
        """Get the path of the images and labels (masks) in a dataframe"""
        image_path, label = [], []

        for image in glob.glob(os.path.join(self.root, self.data_mode, "*_photo.png")):
            image_path.append(image)

        for mask in glob.glob(os.path.join(self.root, self.data_mode, "*_mask.png")):
            label.append(mask)

        df = pd.DataFrame({"image": sorted(image_path), "label": sorted(label)})

        return df

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask)
        """
        img_path = self.img_labels.iloc[idx, 0]
        mask_path = self.img_labels.iloc[idx, 1]

        img = np.array(_load_img(img_path))
        mask = np.array(_load_img(mask_path))

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            mask = Image.fromarray(mask)
            mask = self.target_transform(mask)
        return img, mask

    def __len__(self) -> int:
        return len(self.img_labels)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

Download file

Expand source code

def download(self) -> None:
    """Download file"""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract the .zip file

Expand source code

def extract_file(self) -> None:
    """Extract the .zip file"""
    shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
    os.remove(os.path.join(self.root, self.resources))

def get_path_and_label(self)

Get the path of the images and labels (masks) in a dataframe

Expand source code

def get_path_and_label(self):
    """Get the path of the images and labels (masks) in a dataframe"""
    image_path, label = [], []

    for image in glob.glob(os.path.join(self.root, self.data_mode, "*_photo.png")):
        image_path.append(image)

    for mask in glob.glob(os.path.join(self.root, self.data_mode, "*_mask.png")):
        label.append(mask)

    df = pd.DataFrame({"image": sorted(image_path), "label": sorted(label)})

    return df

class LandCover (root: str, transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), download: bool = False)

The LandCover.ai (Land Cover from Aerial Imagery) dataset.

https://landcover.ai/download/landcover.ai.v1.zip

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class LandCover(VisionDataset):
    """The LandCover.ai (Land Cover from Aerial Imagery) dataset.

    <https://landcover.ai/download/landcover.ai.v1.zip>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://landcover.ai/download"
    resources = "landcover.ai.v1.zip"

    def __init__(
        self,
        root: str,
        transform=Compose([Resize((256, 256)), ToTensor()]),
        target_transform=Compose([Resize((256, 256)), ToTensor()]),
        download: bool = False,
    ) -> None:

        super(LandCover, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()
            self.to_chip_img_mask("landcover")

        self.img_labels = self.get_image_path_and_mask_path()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask)
        """
        img_path = self.img_labels.iloc[idx, 0]
        mask_path = self.img_labels.iloc[idx, 1]
        img = np.array(_load_img(img_path))
        mask = np.array(_load_img(mask_path))

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            mask = Image.fromarray(mask)
            mask = self.target_transform(mask)
        return img, mask

    def __len__(self) -> int:
        return len(self.img_labels)

    def get_image_path_and_mask_path(self):
        """Return dataframe type consist of image path and mask path."""

        img_path = os.path.join(self.root, "landcover", "images")
        msk_path = os.path.join(self.root, "landcover", "masks")

        images_path = [os.path.join(img_path, path) for path in os.listdir(img_path)]
        images_path.sort()
        masks_path = [os.path.join(img_path, path) for path in os.listdir(msk_path)]
        masks_path.sort()

        df = pd.DataFrame({"image": images_path, "mask": masks_path})
        return df

    def to_chip_img_mask(self, base):
        IMGS_DIR = "./{}/images".format(base)
        MASKS_DIR = "./{}/masks".format(base)
        OUTPUT_DIR = "./{}/output".format(base)
        OUTPUT_IMGS_DIR = "./{}/output/images".format(base)
        OUTPUT_MASKS_DIR = "./{}/output/masks".format(base)

        TARGET_SIZE = 512

        img_paths = glob.glob(os.path.join(IMGS_DIR, "*.tif"))
        mask_paths = glob.glob(os.path.join(MASKS_DIR, "*.tif"))

        img_paths.sort()
        mask_paths.sort()

        # os.makedirs(OUTPUT_DIR)
        os.makedirs(OUTPUT_IMGS_DIR)
        os.makedirs(OUTPUT_MASKS_DIR)
        for i, (img_path, mask_path) in enumerate(zip(img_paths, mask_paths)):
            img_filename = os.path.splitext(os.path.basename(img_path))[0]
            mask_filename = os.path.splitext(os.path.basename(mask_path))[0]
            img = cv2.imread(img_path)
            mask = cv2.imread(mask_path)

            assert img_filename == mask_filename and img.shape[:2] == mask.shape[:2]

            k = 0
            for y in range(0, img.shape[0], TARGET_SIZE):
                for x in range(0, img.shape[1], TARGET_SIZE):
                    img_tile = img[y : y + TARGET_SIZE, x : x + TARGET_SIZE]
                    mask_tile = mask[y : y + TARGET_SIZE, x : x + TARGET_SIZE]

                    if img_tile.shape[0] == TARGET_SIZE and img_tile.shape[1] == TARGET_SIZE:
                        out_img_path = os.path.join(
                            OUTPUT_DIR, "images", "{}_{}.jpg".format(img_filename, k)
                        )
                        cv2.imwrite(out_img_path, img_tile)

                        out_mask_path = os.path.join(
                            OUTPUT_DIR, "masks", "{}_{}.png".format(mask_filename, k)
                        )
                        cv2.imwrite(out_mask_path, mask_tile)

                    k += 1

            print("Processed {} {}/{}".format(img_filename, i + 1, len(img_paths)))

    def download(self) -> None:
        """download and extract file."""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def _check_exists(self):
        """Check file has been download or not"""
        self.data_path = os.path.join(
            self.root,
            "landcover",
        )

        return os.path.exists(os.path.join(self.data_path, "images")) and os.path.exists(
            os.path.join(self.data_path, "masks")
        )

    def extract_file(self):
        """Extract file from compressed."""
        os.makedirs(os.path.join(self.root, "landcover"))
        shutil.unpack_archive(
            os.path.join(self.root, self.resources), os.path.join(self.root, "landcover")
        )
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

download and extract file.

Expand source code

def download(self) -> None:
    """download and extract file."""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self)

Extract file from compressed.

Expand source code

def extract_file(self):
    """Extract file from compressed."""
    os.makedirs(os.path.join(self.root, "landcover"))
    shutil.unpack_archive(
        os.path.join(self.root, self.resources), os.path.join(self.root, "landcover")
    )
    os.remove(os.path.join(self.root, self.resources))

def get_image_path_and_mask_path(self)

Return dataframe type consist of image path and mask path.

Expand source code

def get_image_path_and_mask_path(self):
    """Return dataframe type consist of image path and mask path."""

    img_path = os.path.join(self.root, "landcover", "images")
    msk_path = os.path.join(self.root, "landcover", "masks")

    images_path = [os.path.join(img_path, path) for path in os.listdir(img_path)]
    images_path.sort()
    masks_path = [os.path.join(img_path, path) for path in os.listdir(msk_path)]
    masks_path.sort()

    df = pd.DataFrame({"image": images_path, "mask": masks_path})
    return df

def to_chip_img_mask(self, base)

Expand source code

def to_chip_img_mask(self, base):
    IMGS_DIR = "./{}/images".format(base)
    MASKS_DIR = "./{}/masks".format(base)
    OUTPUT_DIR = "./{}/output".format(base)
    OUTPUT_IMGS_DIR = "./{}/output/images".format(base)
    OUTPUT_MASKS_DIR = "./{}/output/masks".format(base)

    TARGET_SIZE = 512

    img_paths = glob.glob(os.path.join(IMGS_DIR, "*.tif"))
    mask_paths = glob.glob(os.path.join(MASKS_DIR, "*.tif"))

    img_paths.sort()
    mask_paths.sort()

    # os.makedirs(OUTPUT_DIR)
    os.makedirs(OUTPUT_IMGS_DIR)
    os.makedirs(OUTPUT_MASKS_DIR)
    for i, (img_path, mask_path) in enumerate(zip(img_paths, mask_paths)):
        img_filename = os.path.splitext(os.path.basename(img_path))[0]
        mask_filename = os.path.splitext(os.path.basename(mask_path))[0]
        img = cv2.imread(img_path)
        mask = cv2.imread(mask_path)

        assert img_filename == mask_filename and img.shape[:2] == mask.shape[:2]

        k = 0
        for y in range(0, img.shape[0], TARGET_SIZE):
            for x in range(0, img.shape[1], TARGET_SIZE):
                img_tile = img[y : y + TARGET_SIZE, x : x + TARGET_SIZE]
                mask_tile = mask[y : y + TARGET_SIZE, x : x + TARGET_SIZE]

                if img_tile.shape[0] == TARGET_SIZE and img_tile.shape[1] == TARGET_SIZE:
                    out_img_path = os.path.join(
                        OUTPUT_DIR, "images", "{}_{}.jpg".format(img_filename, k)
                    )
                    cv2.imwrite(out_img_path, img_tile)

                    out_mask_path = os.path.join(
                        OUTPUT_DIR, "masks", "{}_{}.png".format(mask_filename, k)
                    )
                    cv2.imwrite(out_mask_path, mask_tile)

                k += 1

        print("Processed {} {}/{}".format(img_filename, i + 1, len(img_paths)))

class RESISC45 (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

RESISC45 Dataset.

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class RESISC45(VisionDataset):
    """RESISC45 Dataset.

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://storage.googleapis.com/ossjr"
    resources = "NWPU-RESISC45.zip"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(RESISC45, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.class_enc = CLASS_ENC
        self.class_dec = CLASS_DEC

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img_path = self.img_labels.iloc[idx, 0]
        img = np.array(_load_img(img_path))
        target = self.img_labels.iloc[idx, 1]

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.img_labels)

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label."""
        DATA_SIZE = 700
        category = os.listdir(os.path.join(self.root, "NWPU-RESISC45"))
        image_path = []
        label = []
        for cat in category:
            cat_enc = self.class_enc[cat]
            label += [cat_enc] * DATA_SIZE
            for num in range(1, DATA_SIZE + 1):
                filename = cat + "_" + str(num).zfill(3) + ".jpg"
                image_path += [os.path.join(self.root, "NWPU-RESISC45", cat, filename)]
        df = pd.DataFrame({"image": image_path, "label": label})

        return df

    def _check_exists(self):
        is_exists = os.path.exists(os.path.join(self.root, "NWPU-RESISC45"))
        return is_exists

    def download(self) -> None:
        """Download and extract file."""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract file from compressed."""
        shutil.unpack_archive(os.path.join(self.root, self.resources), f"{self.root}")
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

Download and extract file.

Expand source code

def download(self) -> None:
    """Download and extract file."""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract file from compressed.

Expand source code

def extract_file(self) -> None:
    """Extract file from compressed."""
    shutil.unpack_archive(os.path.join(self.root, self.resources), f"{self.root}")
    os.remove(os.path.join(self.root, self.resources))

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label."""
    DATA_SIZE = 700
    category = os.listdir(os.path.join(self.root, "NWPU-RESISC45"))
    image_path = []
    label = []
    for cat in category:
        cat_enc = self.class_enc[cat]
        label += [cat_enc] * DATA_SIZE
        for num in range(1, DATA_SIZE + 1):
            filename = cat + "_" + str(num).zfill(3) + ".jpg"
            image_path += [os.path.join(self.root, "NWPU-RESISC45", cat, filename)]
    df = pd.DataFrame({"image": image_path, "label": label})

    return df

class Sentinel2Cloud (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Sentinel-2 Cloud Mask Catalogue dataset.

classification_tags: https://zenodo.org/record/4172871/files/classification_tags.csv?download=1 subscenes: https://zenodo.org/record/4172871/files/subscenes.zip?download=1 masks: https://zenodo.org/record/4172871/files/masks.zip?download=1

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class Sentinel2Cloud(VisionDataset):
    """Sentinel-2 Cloud Mask Catalogue dataset.
    
    classification_tags: <https://zenodo.org/record/4172871/files/classification_tags.csv?download=1>
    subscenes: <https://zenodo.org/record/4172871/files/subscenes.zip?download=1>
    masks: <https://zenodo.org/record/4172871/files/masks.zip?download=1>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://zenodo.org/record/4172871/files/"
    resources = "subscenes.zip"
    mask_resources = "masks.zip"

    def __init__(
        self,
        root: str,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(Sentinel2Cloud, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root

        if not os.path.exists(self.root):
            os.makedirs(self.root)

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_image_path_and_mask_path()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask)
        """
        img_path = self.img_labels.iloc[idx, 0]
        mask_path = self.img_labels.iloc[idx, 1]

        img = _load_npy(img_path)
        mask = _load_npy(mask_path)

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            mask = Image.fromarray(mask)
            mask = self.target_transform(mask)
        return img, mask

    def __len__(self) -> int:
        """Return the len of the image labels"""
        return len(self.img_labels)

    def get_image_path_and_mask_path(self):
        """Return dataframe type consist of image path and mask path."""

        img_path = os.path.join(self.root, "sentinel2cloud", "subscenes")
        msk_path = os.path.join(self.root, "sentinel2cloud", "masks")

        images_path = glob.glob(os.path.join(img_path, "*.npy"))
        images_path.sort()
        masks_path = glob.glob(os.path.join(msk_path, "*.npy"))
        masks_path.sort()

        df = pd.DataFrame({"image": images_path, "mask": masks_path})
        return df

    def download(self) -> None:
        """download and extract file."""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

        mask_file_url = posixpath.join(self.mirrors, self.mask_resources)
        _urlretrieve(mask_file_url, os.path.join(self.root, self.mask_resources))

    def _check_exists(self):
        """Check file has been download or not"""
        self.data_path = os.path.join(self.root, "sentinel2cloud")

        return os.path.exists(os.path.join(self.data_path, "subscenes")) and os.path.exists(
            os.path.join(self.data_path, "masks")
        )

    def extract_file(self):
        """Extract file from compressed."""

        os.makedirs(os.path.join(self.root, "sentinel2cloud"))

        shutil.unpack_archive(
            os.path.join(self.root, self.resources), os.path.join(self.root, "sentinel2cloud")
        )
        os.remove(os.path.join(self.root, self.resources))

        shutil.unpack_archive(
            os.path.join(self.root, self.mask_resources), os.path.join(self.root, "sentinel2cloud")
        )
        os.remove(os.path.join(self.root, self.mask_resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mask_resources
var mirrors
var resources

Methods

def download(self) ‑> None

download and extract file.

Expand source code

def download(self) -> None:
    """download and extract file."""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

    mask_file_url = posixpath.join(self.mirrors, self.mask_resources)
    _urlretrieve(mask_file_url, os.path.join(self.root, self.mask_resources))

def extract_file(self)

Extract file from compressed.

Expand source code

def extract_file(self):
    """Extract file from compressed."""

    os.makedirs(os.path.join(self.root, "sentinel2cloud"))

    shutil.unpack_archive(
        os.path.join(self.root, self.resources), os.path.join(self.root, "sentinel2cloud")
    )
    os.remove(os.path.join(self.root, self.resources))

    shutil.unpack_archive(
        os.path.join(self.root, self.mask_resources), os.path.join(self.root, "sentinel2cloud")
    )
    os.remove(os.path.join(self.root, self.mask_resources))

def get_image_path_and_mask_path(self)

Return dataframe type consist of image path and mask path.

Expand source code

def get_image_path_and_mask_path(self):
    """Return dataframe type consist of image path and mask path."""

    img_path = os.path.join(self.root, "sentinel2cloud", "subscenes")
    msk_path = os.path.join(self.root, "sentinel2cloud", "masks")

    images_path = glob.glob(os.path.join(img_path, "*.npy"))
    images_path.sort()
    masks_path = glob.glob(os.path.join(msk_path, "*.npy"))
    masks_path.sort()

    df = pd.DataFrame({"image": images_path, "mask": masks_path})
    return df

class So2Sat (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = True)

So2Sat Dataset to Predict Local Climate Zone (LCZ):

https://mediatum.ub.tum.de/1454690

Args

root : string: Root directory of dataset.
train : bool, optional: If True, creates dataset from training set, otherwise creates from validation set.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class So2Sat(VisionDataset):
    """So2Sat Dataset to Predict Local Climate Zone (LCZ): 
    
    <https://mediatum.ub.tum.de/1454690>

    Args:
        root (string): Root directory of dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from validation set.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "https://dataserv.ub.tum.de/s/m1454690/download?path=/&files="
    resources = ["training.h5", "validation.h5"]

    def __init__(
        self,
        root: str,
        train: bool = True,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = True,
    ) -> None:

        super(So2Sat, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.data_mode = "training" if train else "validation"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()

        self.img_labels = self.get_path_and_label()

    def __len__(self) -> int:
        return len(self.img_labels)

    def __getitem__(self, idx: int) -> Tuple[Any, Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (sen1, sen2, label)
        """
        sen1 = self.img_labels["sen1"][idx]
        sen2 = self.img_labels["sen2"][idx]
        label = self.img_labels["label"][idx]

        if self.transform is not None:
            sen1 = Image.fromarray(sen1)
            sen1 = self.transform(sen1)

            sen2 = Image.fromarray(sen2)
            sen2 = self.transform(sen2)

        if self.target_transform is not None:
            label = Image.fromarray(label)
            label = self.target_transform(label)

        return (sen1, sen2, label)

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label."""
        file = h5py.File(os.path.join(self.root, f"{self.data_mode}.h5"), "r")

        sen1 = np.array(file["sen1"])
        sen2 = np.array(file["sen2"])
        label = np.array(file["label"])

        return {"sen1": sen1, "sen2": sen2, "label": label}

    def _check_exists(self):
        return os.path.exists(os.path.join(self.root, self.resources[0])) and os.path.exists(
            os.path.join(self.root, self.resources[1])
        )

    def download(self):
        """Download and extract file."""
        if not os.path.exists(self.root):
            os.makedirs(self.root)

        for resource in self.resources:
            file_url = posixpath.join(self.mirrors, resource)
            _urlretrieve(file_url, os.path.join(self.root, resource))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self)

Download and extract file.

Expand source code

def download(self):
    """Download and extract file."""
    if not os.path.exists(self.root):
        os.makedirs(self.root)

    for resource in self.resources:
        file_url = posixpath.join(self.mirrors, resource)
        _urlretrieve(file_url, os.path.join(self.root, resource))

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label."""
    file = h5py.File(os.path.join(self.root, f"{self.data_mode}.h5"), "r")

    sen1 = np.array(file["sen1"])
    sen2 = np.array(file["sen2"])
    label = np.array(file["label"])

    return {"sen1": sen1, "sen2": sen2, "label": label}

class SpaceNet7 (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

SpaceNet7 (SN7): Multi-Temporal Urban Development Challenge

https://spacenet.ai/sn7-challenge/

Args

root : string: Root directory of dataset.
train : bool, optional: If True, creates dataset from training set, otherwise creates from test set.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class SpaceNet7(VisionDataset):
    """SpaceNet7 (SN7): Multi-Temporal Urban Development Challenge
    
    <https://spacenet.ai/sn7-challenge/>

    Args:
        root (string): Root directory of dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from test set.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    resources = {
        "train": "s3://spacenet-dataset/spacenet/SN7_buildings/tarballs/SN7_buildings_train.tar.gz",
        "test": "s3://spacenet-dataset/spacenet/SN7_buildings/tarballs/SN7_buildings_test_public.tar.gz",
    }

    def __init__(
        self,
        root: str,
        train: bool = True,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(SpaceNet7, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root
        self.data_mode = "train" if train else "test"
        self.filename = self.resources.get(self.data_mode, "NULL").split("/")[-1]
        self.dataset_path = os.path.join(root, self.filename)
        data_mode_folder = {"train": "train", "test": "test_public"}
        self.folder_name = data_mode_folder.get(self.data_mode, "NULL")

        if not os.path.exists(self.root):
            os.makedirs(self.root)

        if download and self._check_exists(self.dataset_path):
            print("file already exists.")

        if download and not self._check_exists(os.path.join(self.root, self.folder_name)):
            self.download()
            self.extract_file()

        if self.data_mode == "train":
            aois = sorted(
                [
                    f
                    for f in os.listdir(os.path.join(self.root, "train"))
                    if os.path.isdir(os.path.join(self.root, "train", f))
                ]
            )

            aois_without_mask = []
            for aoi in aois:
                mask_dir = os.path.join(self.root, "train", aoi, "masks/")
                if not self._check_exists(mask_dir):
                    aois_without_mask.append(aoi)

            if aois_without_mask:
                print("Generating masks...")
                self.generate_mask(aois_without_mask)

        self.img_labels = self.get_path_and_label()

    def _check_exists(self, obj) -> bool:
        if os.path.exists(obj):
            return True
        else:
            return False

    def download(self):
        """Download dataset and extract it"""
        if self.data_mode not in self.resources.keys():
            raise ValueError("Unrecognized data_mode")

        downloader(self.resources[self.data_mode], self.root)

    def extract_file(self):
        shutil.unpack_archive(self.dataset_path, self.root)

    def generate_mask(self, aois):
        """
        Create Training Masks
        Multi-thread to increase speed
        We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see
            https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb
        """
        make_fbc = False

        input_args = []
        for i, aoi in enumerate(aois):
            print(i, "aoi:", aoi)
            im_dir = os.path.join(self.root, "train", aoi, "images_masked/")
            json_dir = os.path.join(self.root, "train", aoi, "labels_match/")
            out_dir_mask = os.path.join(self.root, "train", aoi, "masks/")
            out_dir_mask_fbc = os.path.join(self.root, "train", aoi, "masks_fbc/")
            os.makedirs(out_dir_mask, exist_ok=True)
            if make_fbc:
                os.makedirs(out_dir_mask_fbc, exist_ok=True)

            json_files = sorted(
                [
                    f
                    for f in os.listdir(os.path.join(json_dir))
                    if f.endswith("Buildings.geojson") and os.path.exists(os.path.join(json_dir, f))
                ]
            )
            for j, f in enumerate(json_files):
                # print(i, j, f)
                name_root = f.split(".")[0]
                json_path = os.path.join(json_dir, f)
                image_path = (
                    os.path.join(im_dir, name_root + ".tif")
                    .replace("labels", "images")
                    .replace("_Buildings", "")
                )
                output_path_mask = os.path.join(out_dir_mask, name_root + ".tif")
                if make_fbc:
                    output_path_mask_fbc = os.path.join(out_dir_mask_fbc, name_root + ".tif")
                else:
                    output_path_mask_fbc = None

                if os.path.exists(output_path_mask):
                    continue
                else:
                    input_args.append(
                        [
                            make_geojsons_and_masks,
                            name_root,
                            image_path,
                            json_path,
                            output_path_mask,
                            output_path_mask_fbc,
                        ]
                    )

        p = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
        out = p.map(map_wrapper, input_args)
        p.close()
        p.join()

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label (for train data),
        or image path only (for test data)."""
        pops = ["train", "test_public"]

        for pop in pops:
            d = os.path.join(self.root, pop)
            im_list, mask_list = [], []
            subdirs = sorted([f for f in os.listdir(d) if os.path.isdir(os.path.join(d, f))])
            for subdir in subdirs:
                if pop == "train":
                    im_files = [
                        os.path.join(d, subdir, "images_masked", f)
                        for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                        if f.endswith(".tif")
                        and os.path.exists(
                            os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                        )
                    ]
                    mask_files = [
                        os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                        for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                        if f.endswith(".tif")
                        and os.path.exists(
                            os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                        )
                    ]
                    im_list.extend(im_files)
                    mask_list.extend(mask_files)

                elif pop == "test_public":
                    im_files = [
                        os.path.join(d, subdir, "images_masked", f)
                        for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                        if f.endswith(".tif")
                    ]
                    im_list.extend(im_files)

            if self.data_mode == "train":
                df = pd.DataFrame({"image": im_list, "label": mask_list})
            elif self.data_mode == "test":
                df = pd.DataFrame({"image": im_list})

            return df

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, mask) or (img)
        """
        img_path = self.img_labels.iloc[idx, 0]
        img = np.array(_load_img(img_path))

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.data_mode == "train":
            mask_path = self.img_labels.iloc[idx, 1]
            mask = np.array(_load_img(mask_path))

            if self.target_transform is not None:
                mask = Image.fromarray(mask)
                mask = self.target_transform(mask)
            sample = (img, mask)

        elif self.data_mode == "test":
            sample = img

        return sample

    def __len__(self) -> int:
        return len(self.img_labels)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var resources

Methods

def download(self)

Download dataset and extract it

Expand source code

def download(self):
    """Download dataset and extract it"""
    if self.data_mode not in self.resources.keys():
        raise ValueError("Unrecognized data_mode")

    downloader(self.resources[self.data_mode], self.root)

def extract_file(self)

Expand source code

def extract_file(self):
    shutil.unpack_archive(self.dataset_path, self.root)

def generate_mask(self, aois)

Create Training Masks Multi-thread to increase speed We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb

Expand source code

def generate_mask(self, aois):
    """
    Create Training Masks
    Multi-thread to increase speed
    We'll only make a 1-channel mask for now, but Solaris supports a multi-channel mask as well, see
        https://github.com/CosmiQ/solaris/blob/master/docs/tutorials/notebooks/api_masks_tutorial.ipynb
    """
    make_fbc = False

    input_args = []
    for i, aoi in enumerate(aois):
        print(i, "aoi:", aoi)
        im_dir = os.path.join(self.root, "train", aoi, "images_masked/")
        json_dir = os.path.join(self.root, "train", aoi, "labels_match/")
        out_dir_mask = os.path.join(self.root, "train", aoi, "masks/")
        out_dir_mask_fbc = os.path.join(self.root, "train", aoi, "masks_fbc/")
        os.makedirs(out_dir_mask, exist_ok=True)
        if make_fbc:
            os.makedirs(out_dir_mask_fbc, exist_ok=True)

        json_files = sorted(
            [
                f
                for f in os.listdir(os.path.join(json_dir))
                if f.endswith("Buildings.geojson") and os.path.exists(os.path.join(json_dir, f))
            ]
        )
        for j, f in enumerate(json_files):
            # print(i, j, f)
            name_root = f.split(".")[0]
            json_path = os.path.join(json_dir, f)
            image_path = (
                os.path.join(im_dir, name_root + ".tif")
                .replace("labels", "images")
                .replace("_Buildings", "")
            )
            output_path_mask = os.path.join(out_dir_mask, name_root + ".tif")
            if make_fbc:
                output_path_mask_fbc = os.path.join(out_dir_mask_fbc, name_root + ".tif")
            else:
                output_path_mask_fbc = None

            if os.path.exists(output_path_mask):
                continue
            else:
                input_args.append(
                    [
                        make_geojsons_and_masks,
                        name_root,
                        image_path,
                        json_path,
                        output_path_mask,
                        output_path_mask_fbc,
                    ]
                )

    p = multiprocessing.Pool(multiprocessing.cpu_count() - 1)
    out = p.map(map_wrapper, input_args)
    p.close()
    p.join()

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label (for train data), or image path only (for test data).

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label (for train data),
    or image path only (for test data)."""
    pops = ["train", "test_public"]

    for pop in pops:
        d = os.path.join(self.root, pop)
        im_list, mask_list = [], []
        subdirs = sorted([f for f in os.listdir(d) if os.path.isdir(os.path.join(d, f))])
        for subdir in subdirs:
            if pop == "train":
                im_files = [
                    os.path.join(d, subdir, "images_masked", f)
                    for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                    if f.endswith(".tif")
                    and os.path.exists(
                        os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                    )
                ]
                mask_files = [
                    os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                    for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                    if f.endswith(".tif")
                    and os.path.exists(
                        os.path.join(d, subdir, "masks", f.split(".")[0] + "_Buildings.tif")
                    )
                ]
                im_list.extend(im_files)
                mask_list.extend(mask_files)

            elif pop == "test_public":
                im_files = [
                    os.path.join(d, subdir, "images_masked", f)
                    for f in sorted(os.listdir(os.path.join(d, subdir, "images_masked")))
                    if f.endswith(".tif")
                ]
                im_list.extend(im_files)

        if self.data_mode == "train":
            df = pd.DataFrame({"image": im_list, "label": mask_list})
        elif self.data_mode == "test":
            df = pd.DataFrame({"image": im_list})

        return df

class UCMercedLand (root: str, transform=Compose( Resize(size=(256, 256), interpolation=bilinear, max_size=None, antialias=None) ToTensor() ), target_transform: Optional[Callable] = None, download: bool = False)

UC Merced Land Use Dataset.

http://weegee.vision.ucmerced.edu/datasets/UCMerced_LandUse.zip

Args

root : string: Root directory of dataset.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Expand source code

class UCMercedLand(VisionDataset):
    """UC Merced Land Use Dataset.

    <http://weegee.vision.ucmerced.edu/datasets/UCMerced_LandUse.zip>

    Args:
        root (string): Root directory of dataset.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.
    """

    mirrors = "http://weegee.vision.ucmerced.edu/datasets/"
    resources = "UCMerced_LandUse.zip"
    classes = {
        "agricultural": 0,
        "airplane": 1,
        "baseballdiamond": 2,
        "beach": 3,
        "buildings": 4,
        "chaparral": 5,
        "denseresidential": 6,
        "forest": 7,
        "freeway": 8,
        "golfcourse": 9,
        "harbor": 10,
        "intersection": 11,
        "mediumresidential": 12,
        "mobilehomepark": 13,
        "overpass": 14,
        "parkinglot": 15,
        "river": 16,
        "runway": 17,
        "sparseresidential": 18,
        "storagetanks": 19,
        "tenniscourt": 20,
    }

    def __init__(
        self,
        root: str,
        transform=Compose([Resize((256, 256)), ToTensor()]),
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(UCMercedLand, self).__init__(
            root, transform=transform, target_transform=target_transform
        )

        self.root = root
        self.data_mode = "Images"

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        self.img_labels = self.get_path_and_label()

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is index of the target class.
        """
        img_path = self.img_labels.iloc[idx, 0]
        img = np.array(_load_img(img_path))
        target = self.img_labels.iloc[idx, 1]

        if self.transform is not None:
            img = Image.fromarray(img)
            img = self.transform(img)

        if self.target_transform is not None:
            target = Image.fromarray(target)
            target = self.target_transform(target)
        return img, target

    def __len__(self) -> int:
        return len(self.img_labels)

    def get_path_and_label(self):
        """Return dataframe type consist of image path and corresponding label."""
        image_path = []
        label = []
        for cat, enc in self.classes.items():
            cat_path = os.path.join(self.root, "UCMerced_LandUse", self.data_mode, cat)
            cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
            cat_label = [enc] * len(cat_image)
            image_path += cat_image
            label += cat_label
        df = pd.DataFrame({"image": image_path, "label": label})

        return df

    def _check_exists(self):
        self.data_path = os.path.join(self.root, "UCMerced_LandUse", "Images")
        self.dir_classes = list(self.classes.keys())
        return all([os.path.exists(os.path.join(self.data_path, i)) for i in self.dir_classes])

    def download(self) -> None:
        """download and extract file."""
        file_url = posixpath.join(self.mirrors, self.resources)
        _urlretrieve(file_url, os.path.join(self.root, self.resources))

    def extract_file(self) -> None:
        """Extract file from compressed."""
        shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
        os.remove(os.path.join(self.root, self.resources))

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var classes
var functions : Dict[str, Callable]
var mirrors
var resources

Methods

def download(self) ‑> None

download and extract file.

Expand source code

def download(self) -> None:
    """download and extract file."""
    file_url = posixpath.join(self.mirrors, self.resources)
    _urlretrieve(file_url, os.path.join(self.root, self.resources))

def extract_file(self) ‑> None

Extract file from compressed.

Expand source code

def extract_file(self) -> None:
    """Extract file from compressed."""
    shutil.unpack_archive(os.path.join(self.root, self.resources), self.root)
    os.remove(os.path.join(self.root, self.resources))

def get_path_and_label(self)

Return dataframe type consist of image path and corresponding label.

Expand source code

def get_path_and_label(self):
    """Return dataframe type consist of image path and corresponding label."""
    image_path = []
    label = []
    for cat, enc in self.classes.items():
        cat_path = os.path.join(self.root, "UCMerced_LandUse", self.data_mode, cat)
        cat_image = [os.path.join(cat_path, path) for path in os.listdir(cat_path)]
        cat_label = [enc] * len(cat_image)
        image_path += cat_image
        label += cat_label
    df = pd.DataFrame({"image": image_path, "label": label})

    return df

class XView (root: str, train: bool = True, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)

Dataset from DIUx xView 2018 Detection Challenge.

Source: https://challenge.xviewdataset.org/data-download (must login)

Args

root : string: Root directory of dataset.
train : bool, optional: If True, creates dataset from training set, otherwise creates from validation set.
transform : callable, optional: A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform : callable, optional: A function/transform that takes in the target and transforms it.
download : bool, optional: If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.

Samples at: - https://storage.googleapis.com/ossjr/xview/train_images.tgz - https://storage.googleapis.com/ossjr/xview/train_labels.tgz - https://storage.googleapis.com/ossjr/xview/validation_images.tgz

Expand source code

class XView(VisionDataset):
    """Dataset from DIUx xView 2018 Detection Challenge.

    Source: https://challenge.xviewdataset.org/data-download (must login)

    Args:
        root (string): Root directory of dataset.
        train (bool, optional): If True, creates dataset from training set, otherwise
            creates from validation set.
        transform (callable, optional): A function/transform that  takes in an PIL image and
            returns a transformed version. E.g, transforms.RandomCrop
        target_transform (callable, optional): A function/transform that takes in the
            target and transforms it.
        download (bool, optional): If true, downloads the dataset from the internet and
            puts it in root directory. If dataset is already downloaded, it is not
            downloaded again.

    Samples at:
    - https://storage.googleapis.com/ossjr/xview/train_images.tgz
    - https://storage.googleapis.com/ossjr/xview/train_labels.tgz
    - https://storage.googleapis.com/ossjr/xview/validation_images.tgz
    """

    urls = []
    resources = ["train_images.tgz", "train_labels.tgz", "validation_images.tgz"]

    def __init__(
        self,
        root: str,
        train: bool = True,
        transform: Optional[Callable] = None,
        target_transform: Optional[Callable] = None,
        download: bool = False,
    ) -> None:

        super(XView, self).__init__(root, transform=transform, target_transform=target_transform)

        self.root = root
        self.data_mode = "train" if train else "validation"
        self.class_enc = CLASS_ENC
        self.class_dec = CLASS_DEC
        self.coords, self.chips, self.classes = None, None, None

        if download and self._check_exists():
            print("file already exists.")

        if download and not self._check_exists():
            self.download()
            self.extract_file()

        if self.data_mode == "train":
            self.coords, self.chips, self.classes = self.get_path_and_label()
            self.imgs = list(os.listdir(os.path.join(self.root, "train_images")))

        elif self.data_mode == "validation":
            self.imgs = list(os.listdir(os.path.join(self.root, "val_images")))

    def _check_exists(self) -> bool:

        if not os.path.isdir(self.root):
            os.mkdir(self.root)

        return (
            os.path.exists(os.path.join(self.root, self.resources[0].split(".")[0]))
            and os.path.exists(os.path.join(self.root, "xView_train.geojson"))
            if self.data_mode == "train"
            else os.path.exists(os.path.join(self.root, "val_images"))
        )

    def download(self):
        """Download file by asking users to input the link"""
        train_images = input(
            "Please follow the following steps to download the required dataset\n"
            + "1. Visit https://challenge.xviewdataset.org/login\n"
            + "2. Sign up for an account\n"
            + "3. Verify your account\n"
            "4. Follow this link: https://challenge.xviewdataset.org/download-links\n"
            "5. Copy the link for 'Download Training Images (tgz)' and paste it: "
        )

        train_labels = input("\n6. Copy and paste the link for 'Download Training Labels (tgz)': ")

        val_images = input("\n7. Copy and paste the link for 'Download Validation Images (tgz)': ")

        self.urls = [train_images, train_labels, val_images]

        for idx, url in enumerate(self.urls):
            _urlretrieve(url, os.path.join(self.root, self.resources[idx]))

    def extract_file(self):
        """Extract the .tgz file"""
        for resource in self.resources:
            shutil.unpack_archive(os.path.join(self.root, resource), self.root)
            os.remove(os.path.join(self.root, resource))

    def _check_exists_label(self, filename):
        """Check whether bounding boxes, image filenames, and labels
        are already extracted from xView_train.geojson
        """
        path_to_check = os.path.join(self.root, filename)
        return path_to_check, os.path.exists(path_to_check)

    def get_path_and_label(self):
        """Gets bounding boxes, image filenames, and labels
        from xView_train.geojson

        Returns:
            coords: coordinates of the bounding boxes
            chips: image file names
            classes: classes for each ground truth
        """
        # check existnce
        coords_path, coords_exists = self._check_exists_label("coords.npy")
        chips_path, chips_exists = self._check_exists_label("chips.npy")
        classes_path, classes_exists = self._check_exists_label("classes.npy")

        # if exist, load and return
        if coords_exists and chips_exists and classes_exists:
            coords = np.load(coords_path)
            chips = np.load(chips_path)
            classes = np.load(classes_path)
            return coords, chips, classes

        # read xView_train.geojson
        fname = os.path.join(self.root, "xView_train.geojson")
        with open(fname) as f:
            data = json.load(f)

        # initialize
        coords, chips, classes = [], [], []

        # extract
        feat_len = len(data["features"])
        img_files = os.listdir(os.path.join(self.root, self.resources[0].split(".")[0]))

        for i in range(feat_len):
            properties = data["features"][i]["properties"]
            b_id = properties["image_id"]
            val = [int(num) for num in properties["bounds_imcoords"].split(",")]

            # type_id 75 and 82 don't belong to any class
            # https://github.com/DIUx-xView/xView1_baseline/issues/3
            if properties["type_id"] not in [75, 82] and b_id in img_files:
                chips.append(b_id)
                classes.append(properties["type_id"])
                coords.append(val)

        # convert to numpy arrays and save
        coords = np.array(coords)
        chips = np.array(chips)
        classes = np.array(classes)
        np.save(coords_path, coords)
        np.save(chips_path, chips)
        np.save(classes_path, classes)
        return coords, chips, classes

    def __getitem__(self, idx: int) -> Tuple[Any, Any]:
        """
        Args:
            idx (int): Index
        Returns:
            tuple: (img, target) where target is a dictionary of target
        consists of bounding boxes and labels.
        """
        if self.data_mode == "train":
            # image
            img_path = os.path.join(self.root, "train_images", self.chips[idx])
            img = np.array(_load_img(img_path))

            if self.transform is not None:
                img = Image.fromarray(img)
                img = self.transform(img)

            # bounding box
            bbox = self.coords[self.chips == self.chips[idx]]
            # label
            label = self.classes[self.chips == self.chips[idx]]
            label = np.vectorize(index_mapping.get)(label)
            # combine bounding box and label
            target = {}
            target["boxes"] = bbox
            target["labels"] = label
            sample = (img, target)

        elif self.data_mode == "validation":
            # image
            img_path = os.path.join(self.root, "val_images", self.imgs[idx])
            img = np.array(_load_img(img_path))

            if self.transform is not None:
                img = Image.fromarray(img)
                img = self.transform(img)

            sample = img
        return sample

    def __len__(self) -> int:
        return len(self.imgs)

Ancestors

VisionDataset
torch.utils.data.dataset.Dataset
typing.Generic

Class variables

var functions : Dict[str, Callable]
var resources
var urls

Methods

def download(self)

Download file by asking users to input the link

Expand source code

def download(self):
    """Download file by asking users to input the link"""
    train_images = input(
        "Please follow the following steps to download the required dataset\n"
        + "1. Visit https://challenge.xviewdataset.org/login\n"
        + "2. Sign up for an account\n"
        + "3. Verify your account\n"
        "4. Follow this link: https://challenge.xviewdataset.org/download-links\n"
        "5. Copy the link for 'Download Training Images (tgz)' and paste it: "
    )

    train_labels = input("\n6. Copy and paste the link for 'Download Training Labels (tgz)': ")

    val_images = input("\n7. Copy and paste the link for 'Download Validation Images (tgz)': ")

    self.urls = [train_images, train_labels, val_images]

    for idx, url in enumerate(self.urls):
        _urlretrieve(url, os.path.join(self.root, self.resources[idx]))

def extract_file(self)

Extract the .tgz file

Expand source code

def extract_file(self):
    """Extract the .tgz file"""
    for resource in self.resources:
        shutil.unpack_archive(os.path.join(self.root, resource), self.root)
        os.remove(os.path.join(self.root, resource))

def get_path_and_label(self)

Gets bounding boxes, image filenames, and labels from xView_train.geojson

Returns

coords: coordinates of the bounding boxes
chips: image file names
classes: classes for each ground truth

Expand source code

def get_path_and_label(self):
    """Gets bounding boxes, image filenames, and labels
    from xView_train.geojson

    Returns:
        coords: coordinates of the bounding boxes
        chips: image file names
        classes: classes for each ground truth
    """
    # check existnce
    coords_path, coords_exists = self._check_exists_label("coords.npy")
    chips_path, chips_exists = self._check_exists_label("chips.npy")
    classes_path, classes_exists = self._check_exists_label("classes.npy")

    # if exist, load and return
    if coords_exists and chips_exists and classes_exists:
        coords = np.load(coords_path)
        chips = np.load(chips_path)
        classes = np.load(classes_path)
        return coords, chips, classes

    # read xView_train.geojson
    fname = os.path.join(self.root, "xView_train.geojson")
    with open(fname) as f:
        data = json.load(f)

    # initialize
    coords, chips, classes = [], [], []

    # extract
    feat_len = len(data["features"])
    img_files = os.listdir(os.path.join(self.root, self.resources[0].split(".")[0]))

    for i in range(feat_len):
        properties = data["features"][i]["properties"]
        b_id = properties["image_id"]
        val = [int(num) for num in properties["bounds_imcoords"].split(",")]

        # type_id 75 and 82 don't belong to any class
        # https://github.com/DIUx-xView/xView1_baseline/issues/3
        if properties["type_id"] not in [75, 82] and b_id in img_files:
            chips.append(b_id)
            classes.append(properties["type_id"])
            coords.append(val)

    # convert to numpy arrays and save
    coords = np.array(coords)
    chips = np.array(chips)
    classes = np.array(classes)
    np.save(coords_path, coords)
    np.save(chips_path, chips)
    np.save(classes_path, classes)
    return coords, chips, classes