Source code for deephyp.data

'''
    Description: high-level classes for using hyperspectral data with the deep learning modules.

    - File name: data.py
    - Author: Lloyd Windrim
    - Date created: June 2019
    - Python package: deephyp


'''


import numpy as np


[docs]class HypImg: """Class for handling data. Stores meta-data and contains attributes for pre-processing the data. If passed labels, \ samples with label zero are considered as a background class. This class is not included in numClasses and data \ samples with this label have a one-hot vector label of all zeros. Args: spectralInput (np.array float): Spectral dataset. Shape can be [numRows x numCols x numBands] or \ [numSamples x numBands]. wavelengths (np.array float): Vector of wavelengths that spectralInput wavelengths lie within. bands (np.array int): Wavelength indexes for each band of spectralInput. Shape [numBands]. labels (np.array int): Class labels for each spectral sample in spectralInput. Shape can be [numRows x numCols] \ or [numSamples]. Attributes: spectra (np.array float): Un-pre-processed spectral data with shape [numSamples x numBands]. spectraCube (np.array float): If data passed as image - un-pre-processed spectral datacube with \ shape [numSamples x numBands]. Else None. spectraPrep (np.array float): Pre-processed spectral data with shape [numSamples x numBands]. numSamples (int): The number of spectra. numRows (int): If data passed as image - the number of image rows. Else None. numCols (int): If data passed as image - the number of image columns. Else None. wavelengths (np.array float): If provided - vector of wavelengths that spectra wavelengths lie within. Else None. bands (np.array int): If provided - wavelength indexes for each band of spectra with shape [numBands]. Else None. labels (np.array int): If provided - class labels for each spectral sample with shape [numSamples]. Else None. labelsOnehot (np.array int): If labels provided - the one-hot label vector for each sample. Samples with label \ zero (background class) have a one-hot vector of all zeros. Else None. """ def __init__( self , spectralInput , labels=None, wavelengths=None, bands=None ): # if input is of shape [numRows x numCols x numBands], convert to [numSamples x numBands] if len( spectralInput.shape ) == 3: self.numRows , self.numCols , self.numBands = spectralInput.shape self.numSamples = self.numRows * self.numCols self.spectra = (np.reshape( spectralInput , ( -1, self.numBands ) )).astype(np.float) self.spectraCube = spectralInput.astype(np.float) else: self.numSamples , self.numBands = spectralInput.shape self.numRows = None self.numCols = None self.spectra = spectralInput.astype(np.float) self.spectraCube = None # if labels provided, determine number of classes and one-hot labels if labels is not None: if len(labels.shape) == 2: self.labels = np.reshape(labels, -1) else: self.labels = labels self.numClasses = len( np.unique(self.labels)[np.unique(self.labels)>0] ) # create one-hot labels for classes > 0 self.labelsOnehot = np.zeros((self.numSamples, self.numClasses)) self.labelsOnehot[np.arange(self.numSamples)[self.labels>0], (self.labels-1)[self.labels>0]] = 1 self.labels = self.labels[:,np.newaxis] else: self.labels = None self.labelsOnehot = None self.numClasses = None self.wavelengths = wavelengths self.bands = bands
[docs] def pre_process( self , method='minmax' ): """Pre-process data for input into the network. Stores in the spectraPrep attribute. Args: method (str): Method of pre-processing. Current options: 'minmax' """ if method == 'minmax': # scales each spectra to be between [0 1] (lower bound is actually a small non-zero number) self.spectraPrep = self.spectra - np.transpose(np.tile(np.min(self.spectra,axis=1)-(1e-3),(self.numBands,1))) self.spectraPrep = self.spectraPrep / np.transpose(np.tile(np.max(self.spectra, axis=1), (self.numBands, 1)))
[docs]class Iterator: """ Class for iterating through data, to train the network. Args: dataSamples (np.array float): Data to be input into the network. Shape [numSamples x numBands]. targets (np.array int): Network output target of each dataSample. For classification, these are the class \ labels, and it could be the dataSamples for autoencoders. Shape [numSamples x arbitrary] batchSize (int): Number of dataSamples per batch Attributes: dataSamples (np.array float): Data to be input into the network. Shape [numSamples x numBands]. targets (np.array int): Network output target of each dataSample. For classification, these are the class \ labels, and it could be the dataSamples for autoencoders. Shape [numSamples x arbitrary] batchSize (int): Number of dataSamples per batch. If None - set to numSamples (i.e. whole dataset). numSamples (int): The number of data samples. currentBatch (int list): A list of indexes specifying the data samples in the current batch. \ Shape [batchSize] """ def __init__(self, dataSamples,targets,batchSize=None): self.dataSamples = dataSamples self.targets = targets self.numSamples = np.shape(dataSamples)[0] if batchSize is not None: self.batchSize = batchSize else: self.batchSize = self.numSamples self.currentBatch = np.arange(self.batchSize)
[docs] def next_batch(self): """ Return next batch of samples and targets (with batchSize number of samples). The currentBatch indexes are \ incremented. If end of dataset reached, the indexes wraps around to the beginning. Returns: (tuple): 2-element tuple containing: - (*np.array float*) - Batch of data samples at currentBatch indexes. Shape [batchSize x numBands]. - (*np.array int*) - Batch of targets at currentBatch indexes. Shape [batchSize x arbitrary]. """ batchData = self.dataSamples[self.currentBatch, :] batchTargets = self.targets[self.currentBatch, :] # update current batch self.currentBatch += self.batchSize self.currentBatch[self.currentBatch >= self.numSamples] = \ self.currentBatch[self.currentBatch >= self.numSamples] - self.numSamples return batchData , batchTargets
[docs] def get_batch(self, idx): """ Returns a specified set of samples and targets. Args: idx (int list): Indexes of samples (and targets) to return. Returns: (tuple): 2-element tuple containing: - (*np.array float*) - Batch of data samples at [idx] indexes. Shape [length(idx) x numBands]. - (*np.array int*) - Batch of targets at [idx] indexes. Shape [length(idx) x arbitrary]. """ batchData = self.dataSamples[idx, :] batchTargets = self.targets[idx, :] return batchData, batchTargets
[docs] def reset_batch(self): """ Resets the current batch to the beginning. """ self.currentBatch = np.arange(self.batchSize)
[docs] def shuffle(self): """ Randomly permutes all dataSamples (and corresponding targets). """ idx = np.random.permutation(np.shape(self.dataSamples)[0]) self.dataSamples = self.dataSamples[idx,:] self.targets = self.targets[idx,:]