Source code for lsst.sims.maf.db.Table

from __future__ import print_function
from builtins import zip
import numpy as np
from sqlalchemy import func, text
from sqlalchemy.sql import expression
import warnings
with warnings.catch_warnings():
    warnings.simplefilter("ignore", UserWarning)
    from lsst.sims.catalogs.db import CatalogDBObject, ChunkIterator

__author__ = 'simon'


[docs]class Table(CatalogDBObject):
    skipRegistration = True
    objid = 'sims_maf'

    def __init__(self, tableName, idColKey, database, driver='sqlite', host=None, port=None,
                 typeOverRide=None, verbose=False):
        """
        Initialize an object for querying OpSim databases

        Keyword arguments:
        @param tableName:  Name of the table to query
        @param idColKey:   Primary key for table
        @param database: Name of database or sqlite filename
        @param driver:   Name of database driver for sqlalchemy (e.g. 'sqlite', 'pymssql+mssql')
        @param host:     Database hostname (optional)
        @param port:     Database port number (optional)
        """
        self.idColKey = idColKey
        self.driver = driver
        self.database = database
        self.host = host
        self.port = port
        self.tableid = tableName

        if typeOverRide is not None:
            self.dbTypeMap.update(typeOverRide)

        super(Table, self).__init__(database=database, driver=driver, host=host, port=port)

    def _get_column_query(self, doGroupBy, colnames=None, aggregate=func.min):
        # Build the sql query - including adding all column names, if columns were None.
        if colnames is None:
            colnames = [k for k in self.columnMap]
        try:
            vals = [self.columnMap[k] for k in colnames]
        except KeyError:
            for c in colnames:
                if c in self.columnMap:
                    continue
                else:
                    print("%s not in columnMap"%(c))
            raise ValueError('entries in colnames must be in self.columnMap', self.columnMap)

        # Get the first query
        idColName = self.columnMap[self.idColKey]
        if idColName in vals:
            idLabel = self.idColKey
        else:
            idLabel = idColName

        #SQL server requires an aggregate on all columns if a group by clause is used.
        #Modify the columnMap to take care of this.  The default is MIN, but it shouldn't
        #matter since the entries are almost identical (except for proposalId).
        #Added double-quotes to handle column names that start with a number.
        if doGroupBy:
            query = self.connection.session.query(aggregate(self.table.c[idColName]).label(idLabel))
        else:
            query = self.connection.session.query(self.table.c[idColName].label(idLabel))
        for col, val in zip(colnames, vals):
            if val is idColName:
                continue
            #Check if this is a default column.
            if val == col:
                #If so, use the column in the table to take care of DB specific column
                #naming conventions (capitalization, spaces, etc.)
                if doGroupBy:
                    query = query.add_column(aggregate(self.table.c[col]).label(col))
                else:
                    query = query.add_column(self.table.c[col].label(col))
            else:
                #If not, assume that the user has specified the column correctly
                if doGroupBy:
                    query = query.add_column(aggregate(expression.literal_column(val)).label(col))
                else:
                    query = query.add_column(expression.literal_column(val).label(col))
        return query

[docs]    def query_columns_Iterator(self, colnames=None, chunk_size=None, constraint=None, groupByCol=None, numLimit=None):
        doGroupBy = not groupByCol is None
        query = self._get_column_query(doGroupBy, colnames=colnames)
        if constraint is not None:
            query = query.filter(text(constraint))

        if doGroupBy:
            #Either group by a column that gives unique visits
            query = query.group_by(self.table.c[groupByCol])
        if numLimit:
            query = query.limit(numLimit)
        return ChunkIterator(self, query, chunk_size)


[docs]    def query_columns_Array(self, colnames=None, chunk_size=1000000, constraint=None,
                            groupByCol=None, numLimit=None):
        """Same as query_columns, but returns a numpy rec array instead. """
        # Query the database, chunk by chunk (to reduce memory footprint).
        # If colnames == None, then will retrieve all columns in table.
        results = self.query_columns_Iterator(colnames=colnames, chunk_size=chunk_size,
                                              constraint=constraint, groupByCol=groupByCol,
                                              numLimit=numLimit)
        rescount = 0
        chunkList = []
        for result in results:
            chunkList.append(result)
            rescount += 1
        # Merge results of chunked queries.
        if rescount > 0:
            simdata = np.hstack(chunkList)
        else: # If there were no results from query, return an empty array
            dataType = [(str(name), float) for name in colnames]
            simdata = np.zeros(0, dtype=dataType)
        return simdata