Module earthvision.datasets.l8biome
L8 Biome Cloud Cover Dataset.
Expand source code
"""L8 Biome Cloud Cover Dataset."""
from PIL import Image
import os
import shutil
import pandas as pd
import glob
import requests
from bs4 import BeautifulSoup
from typing import Any, Callable, Optional, Tuple
from .vision import VisionDataset
from .utils import _urlretrieve, _load_img_hdr, _load_stack_img
class L8Biome(VisionDataset):
"""L8 Biome Cloud Cover.
Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data
Args:
root (string): Root directory of dataset.
transform (callable, optional): A function/transform that takes in an PIL image and
returns a transformed version. E.g, transforms.RandomCrop
target_transform (callable, optional): A function/transform that takes in the
target and transforms it.
download (bool, optional): If true, downloads the dataset from the internet and
puts it in root directory. If dataset is already downloaded, it is not
downloaded again.
"""
mirrors = "https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data"
def __init__(
self,
root: str,
transform: Optional[Callable] = None,
target_transform: Optional[Callable] = None,
download: bool = False,
) -> None:
super(L8Biome, self).__init__(root, transform=transform, target_transform=target_transform)
self.root = root
self.download_urls = self.get_download_url()
self.data_modes = [url.split("/")[-1] for url in self.download_urls]
if download and self._check_exists():
print("file already exists.")
if download and not self._check_exists():
self.download()
self.extract_file()
self.img_labels = self.get_path_and_label()
def get_download_url(self):
"""Get the urls to download the files."""
page = requests.get(self.mirrors)
soup = BeautifulSoup(page.content, "html.parser")
urls = [url.get("href") for url in soup.find_all("a")]
download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls))
return download_urls
def download(self):
"""Download file"""
for resource in self.download_urls:
filename = resource.split("/")[-1]
_urlretrieve(resource, os.path.join(self.root, filename))
def extract_file(self):
"""Extract the .zip file"""
for resource in self.data_modes:
shutil.unpack_archive(os.path.join(self.root, resource), self.root)
os.remove(os.path.join(self.root, resource))
def _check_exists(self):
is_exists = []
if not os.path.isdir(self.root):
os.mkdir(self.root)
for data_mode in self.data_modes:
data_mode = data_mode.replace(".tar.gz", "")
data_path = os.path.join(self.root, "BC", data_mode)
is_exists.append(os.path.exists(data_path))
return all(is_exists)
def get_path_and_label(self):
"""Get the path of the images and labels (masks) in a dataframe"""
image_directory, label = [], []
for data_mode in self.data_modes:
data_mode = data_mode.replace(".tar.gz", "")
image_dir = os.path.join(self.root, "BC", data_mode)
image_directory.append(image_dir)
label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr")))
df = pd.DataFrame({"image": image_directory, "label": label})
return df
def __getitem__(self, idx: int) -> Tuple[Any, Any]:
"""
Args:
idx (int): Index
Returns:
tuple: (img, mask)
"""
img_directory = self.img_labels.iloc[idx, 0]
mask_path = self.img_labels.iloc[idx, 1]
ls_stack_path = []
for idx in range(1, 12):
observation = img_directory.split("/")[-1]
name_file = f"{img_directory}/{observation}_B{idx}.TIF"
ls_stack_path.append(name_file)
img = _load_stack_img(ls_stack_path)
mask = _load_img_hdr(mask_path)
if self.transform is not None:
img = Image.fromarray(img)
img = self.transform(img)
if self.target_transform is not None:
mask = Image.fromarray(mask)
mask = self.target_transform(mask)
return img, mask
def __len__(self) -> int:
return len(self.img_labels)
Classes
class L8Biome (root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False)
-
L8 Biome Cloud Cover.
Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data
Args
root
:string
- Root directory of dataset.
transform
:callable
, optional- A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop
target_transform
:callable
, optional- A function/transform that takes in the target and transforms it.
download
:bool
, optional- If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again.
Expand source code
class L8Biome(VisionDataset): """L8 Biome Cloud Cover. Download page https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data Args: root (string): Root directory of dataset. transform (callable, optional): A function/transform that takes in an PIL image and returns a transformed version. E.g, transforms.RandomCrop target_transform (callable, optional): A function/transform that takes in the target and transforms it. download (bool, optional): If true, downloads the dataset from the internet and puts it in root directory. If dataset is already downloaded, it is not downloaded again. """ mirrors = "https://landsat.usgs.gov/landsat-8-cloud-cover-assessment-validation-data" def __init__( self, root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, download: bool = False, ) -> None: super(L8Biome, self).__init__(root, transform=transform, target_transform=target_transform) self.root = root self.download_urls = self.get_download_url() self.data_modes = [url.split("/")[-1] for url in self.download_urls] if download and self._check_exists(): print("file already exists.") if download and not self._check_exists(): self.download() self.extract_file() self.img_labels = self.get_path_and_label() def get_download_url(self): """Get the urls to download the files.""" page = requests.get(self.mirrors) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls)) return download_urls def download(self): """Download file""" for resource in self.download_urls: filename = resource.split("/")[-1] _urlretrieve(resource, os.path.join(self.root, filename)) def extract_file(self): """Extract the .zip file""" for resource in self.data_modes: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource)) def _check_exists(self): is_exists = [] if not os.path.isdir(self.root): os.mkdir(self.root) for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") data_path = os.path.join(self.root, "BC", data_mode) is_exists.append(os.path.exists(data_path)) return all(is_exists) def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_directory, label = [], [] for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") image_dir = os.path.join(self.root, "BC", data_mode) image_directory.append(image_dir) label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr"))) df = pd.DataFrame({"image": image_directory, "label": label}) return df def __getitem__(self, idx: int) -> Tuple[Any, Any]: """ Args: idx (int): Index Returns: tuple: (img, mask) """ img_directory = self.img_labels.iloc[idx, 0] mask_path = self.img_labels.iloc[idx, 1] ls_stack_path = [] for idx in range(1, 12): observation = img_directory.split("/")[-1] name_file = f"{img_directory}/{observation}_B{idx}.TIF" ls_stack_path.append(name_file) img = _load_stack_img(ls_stack_path) mask = _load_img_hdr(mask_path) if self.transform is not None: img = Image.fromarray(img) img = self.transform(img) if self.target_transform is not None: mask = Image.fromarray(mask) mask = self.target_transform(mask) return img, mask def __len__(self) -> int: return len(self.img_labels)
Ancestors
- VisionDataset
- torch.utils.data.dataset.Dataset
- typing.Generic
Class variables
var functions : Dict[str, Callable]
var mirrors
Methods
def download(self)
-
Download file
Expand source code
def download(self): """Download file""" for resource in self.download_urls: filename = resource.split("/")[-1] _urlretrieve(resource, os.path.join(self.root, filename))
def extract_file(self)
-
Extract the .zip file
Expand source code
def extract_file(self): """Extract the .zip file""" for resource in self.data_modes: shutil.unpack_archive(os.path.join(self.root, resource), self.root) os.remove(os.path.join(self.root, resource))
def get_download_url(self)
-
Get the urls to download the files.
Expand source code
def get_download_url(self): """Get the urls to download the files.""" page = requests.get(self.mirrors) soup = BeautifulSoup(page.content, "html.parser") urls = [url.get("href") for url in soup.find_all("a")] download_urls = list(filter(lambda url: url.endswith(".tar.gz") if url else None, urls)) return download_urls
def get_path_and_label(self)
-
Get the path of the images and labels (masks) in a dataframe
Expand source code
def get_path_and_label(self): """Get the path of the images and labels (masks) in a dataframe""" image_directory, label = [], [] for data_mode in self.data_modes: data_mode = data_mode.replace(".tar.gz", "") image_dir = os.path.join(self.root, "BC", data_mode) image_directory.append(image_dir) label.extend(glob.glob(os.path.join(self.root, "BC", data_mode, "*mask.hdr"))) df = pd.DataFrame({"image": image_directory, "label": label}) return df