Source code for toasty.collection

# -*- mode: python; coding: utf-8 -*-
# Copyright 2021 the AAS WorldWide Telescope project
# Licensed under the MIT License.

"""
Collections of related input images.

Some Toasty processing tasks operate over collections of input images that are
related in some way. This module provides standardized mechanisms for
manipulating such collections. In particular, it provides a framework for
scanning over image "descriptions" without reading in the complete image data.
This can be useful because many collection-related operations want to do an
initial pass over the collection to gather some global information, then a
second pass with the actual data processing.

For interactive usage or in basic scripts, use :func:`load` for maximum
convenience. For command-line interfaces, use the :class:`CollectionLoader`
helper to provide a uniform collection-loading interface.
"""

__all__ = """
load
CollectionLoader
ImageCollection
RubinDirectoryCollection
SimpleFitsCollection
""".split()

from abc import ABC
from glob import glob
import numpy as np
from os.path import join
import warnings

from .image import Image, ImageDescription, ImageMode

MATCH_HEADERS = [
    "CTYPE1",
    "CTYPE2",
    "CRVAL1",
    "CRVAL2",
    "CDELT1",
    "CDELT2",
    "PC1_1",
    "PC1_2",
    "PC2_1",
    "PC2_2",
]



[docs]
class ImageCollection(ABC):

[docs]
    def descriptions(self):
        """
        Generate a sequence of :class:`toasty.image.ImageDescription` items
        associated with this collection.

        Each description will have an added string attribute ``collection_id``
        that gives a unique textual identifer for the item in the collection.

        Unlike :meth:`ImageCollection.images`, this function does cause the full
        data for each image to be loaded.
        """
        raise NotImplementedError()



[docs]
    def images(self):
        """
        Generate a sequence of :class:`toasty.image.Image` items associated with
        this collection.

        Each image will have an added string attribute ``collection_id`` that
        gives a unique textual identifer for the item in the collection.
        """
        raise NotImplementedError()



[docs]
    def export_simple(self):
        """
        Export information about the paths in this collection, if possible.

        Returns
        -------
        A list of tuples ``(fits_file_path, hdu_index)``.

        Raises
        ------
        Raises an exception if it is not possible to meaningfully export this
        particular collection in the "simple" format. This could be the case,
        for instance, if the inputs live in memory and do not have filesystem
        paths.

        Notes
        -----
        Sometimes, you need to expose information about a FITS collection to a
        tool outside of Toasty: for instance, to invoke ``hipsgen`` to generate
        a HiPS tiling. This function gives the information needed to invoke the
        tool, or raises an error if it can't be done correctly. This method
        makes it possible to handle image collections using Toasty's framework,
        but "drop down" to a simpler representation when needed.
        """
        raise Exception("this ImageCollection cannot be exported in the simple format")


    def _is_multi_tan(self):
        ref_headers = None

        for desc in self.descriptions():
            # For figuring out the tiling properties, we need to ensure that
            # we're working in top-down mode everywhere.
            desc.ensure_negative_parity()

            header = desc.wcs.to_header()

            if ref_headers is None:
                ref_headers = {}

                for h in MATCH_HEADERS:
                    value = header.get(h)
                    if value is not None:
                        ref_headers[h] = value

                if (
                    ref_headers["CTYPE1"] != "RA---TAN"
                    or ref_headers["CTYPE2"] != "DEC--TAN"
                ):
                    return False
            else:
                for h, expected in ref_headers.items():
                    observed = header[h]

                    if observed != expected:
                        return False

        return True




[docs]
class SimpleFitsCollection(ImageCollection):
    def __init__(self, paths, hdu_index=None, blankval=None, **kwargs):
        self._paths = list(paths)
        self._hdu_index = hdu_index
        self._blankval = blankval

    def _scan_hdus(self):
        from astropy.io import fits

        for path_index, fits_path in enumerate(self._paths):
            with fits.open(fits_path) as hdul:
                if isinstance(self._hdu_index, int):
                    hdu_index = self._hdu_index
                    hdu = hdul[self._hdu_index]
                elif self._hdu_index is not None:
                    hdu_index = self._hdu_index[path_index]
                    hdu = hdul[self._hdu_index]
                else:
                    for hdu_index, hdu in enumerate(hdul):
                        if (
                            hasattr(hdu, "shape")
                            and len(hdu.shape) > 1
                            and type(hdu) is not fits.hdu.table.BinTableHDU
                        ):
                            break

                if type(hdu) is fits.hdu.table.BinTableHDU:
                    raise Exception(
                        f"cannot process input `{fits_path}`: Did not find any HDU with image data"
                    )

                yield fits_path, hdu_index, hdu


[docs]
    def export_simple(self):
        # We're allowed to return a generator, but we listify this so that the
        # FITS files are all closed promptly.
        return [(t[0], t[1]) for t in self._scan_hdus()]


    def _load(self, actually_load_data):
        from astropy.wcs import WCS

        for fits_path, _hdu_index, hdu in self._scan_hdus():
            # Hack for DASCH files, and potentially others. These have TPV
            # polynomial distortions but do not use the magic projection
            # keyword that causes Astropy to accept them. We don't try to be
            # very flexible in how we activate the hack since we don't want
            # to futz with things unnecessarily.

            if (
                "PV1_5" in hdu.header
                and hdu.header["CTYPE1"] == "RA---TAN"
                and hdu.header["CTYPE2"] == "DEC--TAN"
            ):
                hdu.header["CTYPE1"] = "RA---TPV"
                hdu.header["CTYPE2"] = "DEC--TPV"

                # Extra hack (?) for `rh04475_00_01ww_tnx.fit` needed to fix 180
                # deg rotation. Based on the last paragraph of \S2.2 in
                # Calabretta & Greisen (2002), I think the headers in this file
                # are wrong. This presumably also applies to other DASCH files
                # that cross the north pole; possibly an issue in wcstools,
                # which is the WCS kit used for this project. I'm not currently
                # able to get my hands on other DASCH files to test out other
                # cases where this fix may be needed.
                if hdu.header["CRVAL2"] == 90:
                    hdu.header["LONPOLE"] = 180.0

            # End hack(s).

            wcs = WCS(hdu.header)
            shape = hdu.shape

            if wcs.naxis >= len(shape):
                # Sometimes the WCS defines more axes than are in the data cube;
                # this should generally mean that there are virtual coordinate
                # axes that we can think of as adding extra size-1 dimensions.
                shape = (1,) * (wcs.naxis - len(shape)) + shape

            # We need to make sure the data are 2D celestial, since that's
            # what our image code and `reproject` (if it's being used) expect.

            full_wcs = None
            keep_axes = None

            if wcs.naxis != 2:
                if not wcs.has_celestial:
                    raise Exception(
                        f"cannot process input `{fits_path}`: WCS cannot be reduced to 2D celestial"
                    )

                full_wcs = wcs
                wcs = full_wcs.celestial

                # note: get_axis_types returns axes in FITS order, innermost first
                keep_axes = [
                    t.get("coordinate_type") == "celestial"
                    for t in full_wcs.get_axis_types()[::-1]
                ]

                for axnum, (keep, axlen) in enumerate(zip(keep_axes, shape)):
                    if not keep and axlen != 1:
                        warnings.warn(
                            f"taking 0'th plane of non-celestial axis #{axnum} in input `{fits_path}`"
                        )

            # OK, almost there. Are we loading data or just the descriptor?

            if actually_load_data:
                # Reshape in case of extra WCS axes:
                data = hdu.data.reshape(shape)

                if full_wcs is not None:  # need to subset?
                    data = data[tuple(slice(None) if k else 0 for k in keep_axes)]

                if self._blankval is not None:
                    data[data == self._blankval] = np.nan

                result = Image.from_array(data, wcs=wcs, default_format="fits")
            else:
                if full_wcs is not None:  # need to subset?
                    shape = tuple(t[1] for t in zip(keep_axes, shape) if t[0])

                if hasattr(hdu, "dtype"):
                    mode = ImageMode.from_array_info(shape, hdu.dtype)
                else:
                    mode = None  # CompImageHDU doesn't have dtype

                result = ImageDescription(mode=mode, shape=shape, wcs=wcs)

            result.collection_id = fits_path
            yield result


[docs]
    def descriptions(self):
        return self._load(False)



[docs]
    def images(self):
        return self._load(True)





[docs]
class RubinDirectoryCollection(ImageCollection):
    """
    Load up imagery from a directory containing FITS files capturing a Rubin
    Observatory tract.

    The directory will be searched for files whose names end in ``.fits``. In
    each of those files, the HDUs beyond the first will be treated as separate
    science images that should be individually loaded. The images will be
    trimmed according to their ``DATASEC`` specification before being returned.

    This class requires the ``ccdproc``  package to trim FITS CCD datasets.
    """

    def __init__(self, dirname, unit=None):
        self._dirname = dirname
        self._unit = unit

    def _load(self, actually_load_data):
        from astropy.io import fits
        from astropy.nddata import ccddata
        import ccdproc

        for fits_path in glob(join(self._dirname, "*.fits")):
            # `astropy.nddata.ccddata.fits_ccddata_reader` only opens FITS from
            # filenames, not from an open HDUList, which means that creating
            # multiple CCDDatas from the same FITS file rapidly becomes
            # inefficient. So, we emulate its logic.

            with fits.open(fits_path) as hdu_list:
                for idx, hdu in enumerate(hdu_list):
                    if idx == 0:
                        header0 = hdu.header
                    else:
                        hdr = hdu.header
                        hdr.extend(header0, unique=True)

                        # This ccddata function often generates annoying warnings
                        with warnings.catch_warnings():
                            warnings.simplefilter("ignore")
                            hdr, wcs = ccddata._generate_wcs_and_update_header(hdr)

                        # We can't use `ccdproc.trim_image()` without having a
                        # CCDData in hand, so we have to create a fake empty
                        # array even when we're not actually loading any data.
                        #
                        # Note: we skip all the unit-handling logic from
                        # `fits_ccddata_reader` here basically since the LSST
                        # sim data I'm using don't have anything useful.

                        if actually_load_data:
                            data = hdu.data

                            if data.dtype.kind == "i":
                                data = data.astype(np.float32)
                        else:
                            data = np.empty(hdu.shape, dtype=np.void)

                        ccd = ccddata.CCDData(data, meta=hdr, unit=self._unit, wcs=wcs)
                        ccd = ccdproc.trim_image(
                            ccd, fits_section=ccd.header["DATASEC"]
                        )
                        data = ccd.data
                        shape = data.shape
                        wcs = ccd.wcs

                        if actually_load_data:
                            mode = ImageMode.from_array_info(shape, data.dtype)
                        elif hasattr(hdu, "dtype"):
                            mode = ImageMode.from_array_info(shape, hdu.dtype)
                        else:
                            mode = None  # CompImageHDU doesn't have dtype

                        if actually_load_data:
                            result = Image.from_array(
                                data, wcs=wcs, default_format="fits"
                            )
                        else:
                            result = ImageDescription(mode=mode, shape=shape, wcs=wcs)

                        result.collection_id = f"{fits_path}:{idx}"
                        yield result


[docs]
    def descriptions(self):
        return self._load(False)



[docs]
    def images(self):
        return self._load(True)





[docs]
class CollectionLoader(object):
    """
    A class defining how to load a collection of images.

    This is implemented as its own class since there can be some options
    involved, and we want to provide a centralized place for handling them all.

    For interactive usage, use :func:`load`, which is optimized for easy
    invocation. This class is best used in command-line interfaces that interact
    with collections of FITS files.
    """

    blankval = None
    hdu_index = None


[docs]
    @classmethod
    def add_arguments(cls, parser):
        """
        Add standard collection-loading options to an argparse parser object.

        Parameters
        ----------
        parser : :class:`argparse.ArgumentParser`
            The argument parser to modify

        Returns
        -------
        The :class:`CollectionLoader` class (for chainability).

        Notes
        -----
        If you are writing a command-line interface that takes an image
        collection as an input, use this function to wire in to standardized
        image-loading infrastructure and options.
        """

        parser.add_argument(
            "--hdu-index",
            metavar="INDEX[,INDEX...]",
            help="Which HDU to load in each input FITS file",
        )
        parser.add_argument(
            "--blankval",
            metavar="NUMBER",
            help="An image value to treat as undefined in all FITS inputs",
        )
        return cls



[docs]
    @classmethod
    def create_from_args(cls, settings):
        """
        Process standard collection-loading options to create a
        :class:`CollectionLoader`.

        Parameters
        ----------
        settings : :class:`argparse.Namespace`
            Settings from processing command-line arguments

        Returns
        -------
        A new :class:`CollectionLoader` initialized with the settings.
        """
        loader = cls()

        if settings.hdu_index is not None:
            # Note: semantically we treat `hdu_index = 2` differently than
            # `hdu_index = [2]`: the first means use HDU 2 in all files, the
            # second means use HDU 2 in the first file. But we don't have a way
            # to distinguish between the two when parsing the CLI argument.
            try:
                index = int(settings.hdu_index)
            except ValueError:
                try:
                    index = list(map(int, settings.hdu_index.split(",")))
                except Exception:
                    raise Exception(
                        "cannot parse `--hdu-index` setting `{settings.hdu_index!r}`: should "
                        "be a comma-separated list of one or more integers"
                    )

            loader.hdu_index = index

        if settings.blankval is not None:
            try:
                # If integer input image, want to avoid roundoff/precision issues
                v = int(settings.blankval)
            except ValueError:
                try:
                    v = float(settings.blankval)
                except ValueError:
                    raise Exception(
                        "cannot parse `--blankval` setting `{settings.blankval!r}`: should "
                        "be a number"
                    )

            loader.blankval = v

        return loader



[docs]
    def load_paths(self, paths):
        """
        Set up an `ImageCollection` based on a list of input filesystem paths.

        Parameters
        ----------
        paths : iterable of str
            The filesystem paths to load.

        Returns
        -------
        A new :class:`ImageCollection`.
        """
        paths = list(str(p) for p in paths)

        # This is where more cleverness will go if/when needed.

        return SimpleFitsCollection(
            paths,
            hdu_index=self.hdu_index,
            blankval=self.blankval,
        )





[docs]
def load(input, hdu_index=None, blankval=None):
    """
    Set up a collection of FITS files as sensibly as possible.

    Parameters
    ----------
    input : str or iterable of str
        The filesystem path(s) of the FITS file(s) to load.
    hdu_index : optional int or list of int, default None
        Which HDU to load. If a scalar, the same index will be
        used in every input file; if a list, corresponding values
        will be used for each input path. If unspecified, Toasty
        will guess.
    blankval : optional number, default None
        An image value to treat as undefined in all FITS inputs.

    Returns
    -------
    A new :class:`ImageCollection`.

    Notes
    -----
    This function is meant to be called interactively from environments like
    Jupyter. Its goal is to "do the right thing" as reliably as possible,
    requiring as little user guidance as needed.
    """
    # One day this function might add more custom logic? But right now we're just
    # providing a streamlined way to access the CollectionLoader.

    if isinstance(input, str):
        input = [input]

    input = list(input)
    loader = CollectionLoader()
    loader.hdu_index = hdu_index
    loader.blankval = blankval
    return loader.load_paths(input)
Navigation

Source code for toasty.collection