Source code for lsst.sims.maf.slicers.baseSlicer

from __future__ import print_function
from future import standard_library
standard_library.install_aliases()
from builtins import str
from builtins import zip
from builtins import range
from builtins import object
# Base class for all 'Slicer' objects.
#
import inspect
from io import StringIO
import json
import warnings
import numpy as np
import numpy.ma as ma
from lsst.sims.maf.utils import getDateVersion
from future.utils import with_metaclass

__all__ = ['SlicerRegistry', 'BaseSlicer']

[docs]class SlicerRegistry(type): """ Meta class for slicers, to build a registry of slicer classes. """ def __init__(cls, name, bases, dict): super(SlicerRegistry, cls).__init__(name, bases, dict) if not hasattr(cls, 'registry'): cls.registry = {} modname = inspect.getmodule(cls).__name__ + '.' if modname.startswith('lsst.sims.maf.slicers'): modname = '' slicername = modname + name if slicername in cls.registry: raise Exception('Redefining metric %s! (there are >1 slicers with the same name)' %(slicername)) if slicername not in ['BaseSlicer', 'BaseSpatialSlicer']: cls.registry[slicername] = cls
[docs] def getClass(cls, slicername): return cls.registry[slicername]
[docs] def help(cls, doc=False): for slicername in sorted(cls.registry): if not doc: print(slicername) if doc: print('---- ', slicername, ' ----') print(inspect.getdoc(cls.registry[slicername]))
[docs]class BaseSlicer(with_metaclass(SlicerRegistry, object)): """ Base class for all slicers: sets required methods and implements common functionality. After first construction, the slicer should be ready for setupSlicer to define slicePoints, which will let the slicer 'slice' data and generate plots. After init after a restore: everything necessary for using slicer for plotting or saving/restoring metric data should be present (although slicer does not need to be able to slice data again and generally will not be able to). Parameters ---------- verbose: boolean, optional True/False flag to send extra output to screen. Default True. badval: int or float, optional The value the Slicer uses to fill masked metric data values Default -666. """ def __init__(self, verbose=True, badval=-666): self.verbose = verbose self.badval = badval # Set cacheSize : each slicer will be able to override if appropriate. # Currently only the healpixSlice actually uses the cache: this is set in 'useCache' flag. # If other slicers have the ability to use the cache, they should add this flag and set the # cacheSize in their __init__ methods. self.cacheSize = 0 # Set length of Slicer. self.nslice = None self.shape = self.nslice self.slicePoints = {} self.slicerName = self.__class__.__name__ self.columnsNeeded = [] # Create a dict that saves how to re-init the slicer. # This may not be the whole set of args/kwargs, but those which carry useful metadata or # are absolutely necesary for init. # Will often be overwritten by individual slicer slicer_init dictionaries. self.slicer_init = {'badval':badval} self.plotFuncs = [] # Note if the slicer needs OpSim field ID info self.needsFields = False # Set the y-axis range be on the two-d plot if self.nslice is not None: self.spatialExtent = [0,self.nslice-1] def _runMaps(self, maps): """Add map metadata to slicePoints. """ if maps is not None: for m in maps: self.slicePoints = m.run(self.slicePoints)
[docs] def setupSlicer(self, simData, maps=None): """Set up Slicer for data slicing. Set up internal parameters necessary for slicer to slice data and generates indexes on simData. Also sets _sliceSimData for a particular slicer. Parameters ----------- simData : np.recarray The simulated data to be sliced. maps : list of lsst.sims.maf.maps objects, optional. Maps to apply at each slicePoint, to add to the slicePoint metadata. Default None. """ # Typically args will be simData, but opsimFieldSlicer also uses fieldData. raise NotImplementedError()
[docs] def getSlicePoints(self): """Return the slicePoint metadata, for all slice points. """ return self.slicePoints
[docs] def __len__(self): """Return nslice, the number of slicePoints in the slicer. """ return self.nslice
[docs] def __iter__(self): """Iterate over the slices. """ self.islice = 0 return self
[docs] def __next__(self): """Returns results of self._sliceSimData when iterating over slicer. Results of self._sliceSimData should be dictionary of {'idxs': the data indexes relevant for this slice of the slicer, 'slicePoint': the metadata for the slicePoint, which always includes 'sid' key for ID of slicePoint.} """ if self.islice >= self.nslice: raise StopIteration islice = self.islice self.islice += 1 return self._sliceSimData(islice)
def __getitem__(self, islice): return self._sliceSimData(islice)
[docs] def __eq__(self, otherSlicer): """ Evaluate if two slicers are equivalent. """ raise NotImplementedError()
[docs] def __ne__(self, otherSlicer): """ Evaluate if two slicers are not equivalent. """ if self == otherSlicer: return False else: return True
def _sliceSimData(self, slicePoint): """ Slice the simulation data appropriately for the slicer. Given the identifying slicePoint metadata The slice of data returned will be the indices of the numpy rec array (the simData) which are appropriate for the metric to be working on, for that slicePoint. """ raise NotImplementedError('This method is set up by "setupSlicer" - run that first.')
[docs] def writeData(self, outfilename, metricValues, metricName='', simDataName ='', constraint=None, metadata='', plotDict=None, displayDict=None): """ Save metric values along with the information required to re-build the slicer. Parameters ----------- outfilename : str The output file name. metricValues : np.ma.MaskedArray or np.ndarray The metric values to save to disk. """ header = {} header['metricName']=metricName header['constraint'] = constraint header['metadata'] = metadata header['simDataName'] = simDataName date, versionInfo = getDateVersion() header['dateRan'] = date if displayDict is None: displayDict = {'group':'Ungrouped'} header['displayDict'] = displayDict header['plotDict'] = plotDict for key in versionInfo: header[key] = versionInfo[key] if hasattr(metricValues, 'mask'): # If it is a masked array data = metricValues.data mask = metricValues.mask fill = metricValues.fill_value else: data = metricValues mask = None fill = None # npz file acts like dictionary: each keyword/value pair below acts as a dictionary in loaded NPZ file. np.savez(outfilename, header = header, # header saved as dictionary metricValues = data, # metric data values mask = mask, # metric mask values fill = fill, # metric badval/fill val slicer_init = self.slicer_init, # dictionary of instantiation parameters slicerName = self.slicerName, # class name slicePoints = self.slicePoints, # slicePoint metadata saved (is a dictionary) slicerNSlice = self.nslice, slicerShape = self.shape)
[docs] def outputJSON(self, metricValues, metricName='', simDataName ='', metadata='', plotDict=None): """ Send metric data to JSON streaming API, along with a little bit of metadata. This method will only work for metrics where the metricDtype is float or int, as JSON will not interpret more complex data properly. These values can't be plotted anyway though. Parameters ----------- metricValues : np.ma.MaskedArray or np.ndarray The metric values. metricName : str, optional The name of the metric. Default ''. simDataName : str, optional The name of the simulated data source. Default ''. metadata : str, optional The metadata about this metric. Default ''. plotDict : dict, optional. The plotDict for this metric bundle. Default None. Returns -------- StringIO StringIO object containing a header dictionary with metricName/metadata/simDataName/slicerName, and plot labels from plotDict, and metric values/data for plot. if oneDSlicer, the data is [ [bin_left_edge, value], [bin_left_edge, value]..]. if a spatial slicer, the data is [ [lon, lat, value], [lon, lat, value] ..]. """ # Bail if this is not a good data type for JSON. if not (metricValues.dtype == 'float') or (metricValues.dtype == 'int'): warnings.warn('Cannot generate JSON.') io = StringIO() json.dump(['Cannot generate JSON for this file.'], io) return None # Else put everything together for JSON output. if plotDict is None: plotDict = {} plotDict['units'] = '' # Preserve some of the metadata for the plot. header = {} header['metricName'] = metricName header['metadata'] = metadata header['simDataName'] = simDataName header['slicerName'] = self.slicerName header['slicerLen'] = int(self.nslice) # Set some default plot labels if appropriate. if 'title' in plotDict: header['title'] = plotDict['title'] else: header['title'] = '%s %s: %s' %(simDataName, metadata, metricName) if 'xlabel' in plotDict: header['xlabel'] = plotDict['xlabel'] else: if hasattr(self, 'sliceColName'): header['xlabel'] = '%s (%s)' %(self.sliceColName, self.sliceColUnits) else: header['xlabel'] = '%s' %(metricName) if 'units' in plotDict: header['xlabel'] += ' (%s)' %(plotDict['units']) if 'ylabel' in plotDict: header['ylabel'] = plotDict['ylabel'] else: if hasattr(self, 'sliceColName'): header['ylabel'] = '%s' %(metricName) if 'units' in plotDict: header['ylabel'] += ' (%s)' %(plotDict['units']) else: # If it's not a oneDslicer and no ylabel given, don't need one. pass # Bundle up slicer and metric info. metric = [] # If metric values is a masked array. if hasattr(metricValues, 'mask'): if 'ra' in self.slicePoints: # Spatial slicer. Translate ra/dec to lon/lat in degrees and output with metric value. for ra, dec, value, mask in zip(self.slicePoints['ra'], self.slicePoints['dec'], metricValues.data, metricValues.mask): if not mask: lon = ra * 180.0/np.pi lat = dec * 180.0/np.pi metric.append([lon, lat, value]) elif 'bins' in self.slicePoints: # OneD slicer. Translate bins into bin/left and output with metric value. for i in range(len(metricValues)): binleft = self.slicePoints['bins'][i] value = metricValues.data[i] mask = metricValues.mask[i] if not mask: metric.append([binleft, value]) else: metric.append([binleft, 0]) metric.append([self.slicePoints['bins'][i+1], 0]) elif self.slicerName == 'UniSlicer': metric.append([metricValues[0]]) # Else: else: if 'ra' in self.slicePoints: for ra, dec, value in zip(self.slicePoints['ra'], self.slicePoints['dec'], metricValues): lon = ra * 180.0/np.pi lat = dec * 180.0/np.pi metric.append([lon, lat, value]) elif 'bins' in self.slicePoints: for i in range(len(metricValues)): binleft = self.slicePoints['bins'][i] value = metricValues[i] metric.append([binleft, value]) metric.append(self.slicePoints['bins'][i+1][0]) elif self.slicerName == 'UniSlicer': metric.append([metricValues[0]]) # Write out JSON output. io = StringIO() json.dump([header, metric], io) return io
[docs] def readData(self, infilename): """ Read metric data from disk, along with the info to rebuild the slicer (minus new slicing capability). Parameters ----------- infilename: str The filename containing the metric data. Returns ------- np.ma.MaskedArray, lsst.sims.maf.slicer, dict MetricValues stored in data file, the slicer basis for those metric values, and a dictionary containing header information (runName, metadata, etc.). """ import lsst.sims.maf.slicers as slicers # We have many old metric files saved with py2 and these need a bit of extra care to reload. # First determine if this is the case: restored = np.load(infilename) py2_to_py3 = False try: restored['slicePoints'] except UnicodeError: # Old metric data files saved by py2 stored the slicepoints with bytes. restored = np.load(infilename, encoding='bytes') py2_to_py3 = True # Get metadata and other simData info. header = restored['header'][()] slicer_init = restored['slicer_init'][()] slicerName = str(restored['slicerName']) slicePoints = restored['slicePoints'][()] if py2_to_py3: h = {} for k, v in header.items(): newkey = str(k, 'utf-8') if isinstance(v, bytes): value = str(v, 'utf-8') else: value = v h[newkey] = value header = h si = {} for k, v in slicer_init.items(): newkey = str(k, 'utf-8') if isinstance(v, bytes): value = str(v, 'utf-8') else: value = v si[newkey] = value slicer_init = si sp = {} for k, v in slicePoints.items(): newkey = str(k, 'utf-8') sp[newkey] = v slicePoints = sp slicerName = str(restored['slicerName'], 'utf-8') # Backwards compatibility issue - map 'spatialkey1/spatialkey2' to 'lonCol/latCol'. if 'spatialkey1' in slicer_init: slicer_init['lonCol'] = slicer_init['spatialkey1'] del (slicer_init['spatialkey1']) if 'spatialkey2' in slicer_init: slicer_init['latCol'] = slicer_init['spatialkey2'] del (slicer_init['spatialkey2']) try: slicer = getattr(slicers, slicerName)(**slicer_init) except TypeError: warnings.warn('Cannot use saved slicer init values; falling back to defaults') slicer = getattr(slicers, slicerName)() # Restore slicePoint metadata. slicer.nslice = restored['slicerNSlice'] slicer.slicePoints = slicePoints slicer.shape = restored['slicerShape'] # Get metric data set if restored['mask'][()] is None: metricValues = ma.MaskedArray(data=restored['metricValues']) else: metricValues = ma.MaskedArray(data=restored['metricValues'], mask=restored['mask'], fill_value=restored['fill']) return metricValues, slicer, header