try:
    import astropy  # perform image cutouts
    import hpgeom  # map sky location to catalog partition
    import matplotlib  # view results
    import pandas  # query Parquet datasets
    import pyarrow  # work with Parquet datasets
    import pyvo  # locate images in the cloud (pyvo>=1.5 required)
    import s3fs  # browse buckets
except ImportError:
    %pip install astropy
    %pip install hpgeom
    %pip install pandas
    %pip install pyarrow
    %pip install pyvo>=1.5
    %pip install s3fs
    %pip install -U matplotlib

# check for pyvo>=1.5 (required for SIA2Service)
try:
    import pyvo
    pyvo.dal.SIA2Service("https://irsa.ipac.caltech.edu/SIA")
except AttributeError:
    %pip install --upgrade pyvo
    # note that you may need to restart the kernel to use the updated package

import s3fs

# create an S3 client
s3 = s3fs.S3FileSystem(anon=True)

BUCKET_NAME = "nasa-irsa-spitzer"
IMAGES_PREFIX = "spitzer/seip/seip_science/images"

s3.ls(f"{BUCKET_NAME}/{IMAGES_PREFIX}/")

['nasa-irsa-spitzer/spitzer/seip/seip_science/images/2',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/3',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/5',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/6',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/7']

sub_prefix = "4/0019/40019821/9"
glob_pattern = "**/*.mosaic.fits"

s3.glob(f"{BUCKET_NAME}/{IMAGES_PREFIX}/{sub_prefix}/{glob_pattern}")

['nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-19/40019821.40019821-19.MIPS.1.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9-short.IRAC.1.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9-short.IRAC.2.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9-short.IRAC.3.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9-short.IRAC.4.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9.IRAC.1.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9.IRAC.2.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9.IRAC.3.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9.IRAC.4.mosaic.fits',
 'nasa-irsa-spitzer/spitzer/seip/seip_science/images/4/0019/40019821/9/40019821-9/40019821.40019821-9.MIPS.1.mosaic.fits']

import astropy.io
import json
import pyvo
from astropy import units as u
from astropy.coordinates import SkyCoord
from astropy.nddata import Cutout2D
from astropy.wcs import WCS
from matplotlib import pyplot as plt

coords = SkyCoord("150.01d 2.2d", frame="icrs")
size = 0.01 * u.deg

# from astroquery.ipac.irsa import Irsa

# Irsa.list_collections()

# # If `list_collections` is not found,
# # uncomment the next line to update astroquery, then restart your kernel.
# # !pip install --upgrade --pre astroquery

irsa_SIA = pyvo.dal.SIA2Service("https://irsa.ipac.caltech.edu/SIA")
seip_results = irsa_SIA.search((coords, size), collection="spitzer_seip")

# view cloud access info for the first few files overlapping the search area
seip_results["cloud_access"][:5]

masked_array(data=['{"aws": {"bucket_name": "nasa-irsa-spitzer", "key":"spitzer/seip/seip_science/images/6/0095/60095931/4/60095931-14/60095931.60095931-14.IRAC.4.brightmask.fits", "region": "us-west-2"}}',
                   '{"aws": {"bucket_name": "nasa-irsa-spitzer", "key":"spitzer/seip/seip_science/images/6/0095/60095931/4/60095931-14/60095931.60095931-14.IRAC.4.extmask.fits", "region": "us-west-2"}}',
                   '{"aws": {"bucket_name": "nasa-irsa-spitzer", "key":"spitzer/seip/seip_science/images/6/0095/60095931/4/60095931-14/60095931.60095931-14.IRAC.4.unc.fits", "region": "us-west-2"}}',
                   '{"aws": {"bucket_name": "nasa-irsa-spitzer", "key":"spitzer/seip/seip_science/images/6/0095/60095931/4/60095931-14/60095931.60095931-14.IRAC.4.std.fits", "region": "us-west-2"}}',
                   '{"aws": {"bucket_name": "nasa-irsa-spitzer", "key":"spitzer/seip/seip_science/images/6/0095/60095931/4/60095931-14/60095931.60095931-14.IRAC.4.mosaic.fits", "region": "us-west-2"}}'],
             mask=[False, False, False, False, False],
       fill_value=array('?', dtype='<U1'),
            dtype=object)

# find the first mosaic file in the results
# use json to convert the string containing the cloud info to a dictionary
seip_mosaic_cloud_info = json.loads([i for i in seip_results["cloud_access"] if ".mosaic.fits" in i][0])

# extract
BUCKET_NAME = seip_mosaic_cloud_info["aws"]["bucket_name"]
image_key = seip_mosaic_cloud_info["aws"]["key"]

BUCKET_NAME = "nasa-irsa-spitzer"
IMAGES_PREFIX = "spitzer/seip/seip_science/images"

# find the first mosaic URL in the results
image_url = [url for url in seip_results["access_url"] if ".mosaic.fits" in url][0]

# construct the key by replacing the first part of the URL with the IMAGES_PREFIX
# split the URL at the data product ID, the final level of the prefix
data_product_id = IMAGES_PREFIX.split("/")[-1]
key_suffix = image_url.split(f"/{data_product_id}/")[-1]
image_key = f"{IMAGES_PREFIX}/{key_suffix}"

with astropy.io.fits.open(f"s3://{BUCKET_NAME}/{image_key}", fsspec_kwargs={"anon": True}) as hdul:
    cutout = Cutout2D(hdul[0].section, position=coords, size=size, wcs=WCS(hdul[0].header))

plt.imshow(cutout.data, cmap="gray")

<matplotlib.image.AxesImage at 0x7f635c8178d0>

import hpgeom
import pandas as pd
import pyarrow.dataset
import pyarrow.fs
from matplotlib import colors

BUCKET_NAME = "nasa-irsa-wise"
BUCKET_REGION = "us-west-2"

CATALOG_PREFIX = "wise/allwise/catalogs/p3as_psd/healpix_k5"
PARQUET_NAME = "wise-allwise.parquet"

parquet_root = f"{BUCKET_NAME}/{CATALOG_PREFIX}/{PARQUET_NAME}"

fs = pyarrow.fs.S3FileSystem(region=BUCKET_REGION, anonymous=True)

# load the schema from the "_common_metadata" file
schema = pyarrow.dataset.parquet_dataset(f"{parquet_root}/_common_metadata", filesystem=fs).schema

# the full schema can be quite large since catalogs often have hundreds of columns
# but if you do want to look at the entire schema, uncomment the next line
# schema

[name for name in schema.names if "healpix" in name]
# the result shows that both orders k=0 and k=5 are available, but typical use cases only need k=5

['healpix_k0', 'healpix_k5']

# this will display basic information like name and type
schema.field("ext_flg")

pyarrow.Field<ext_flg: int64>

# units and descriptions are in the field's metadata attribute
schema.field("ext_flg").metadata

{b'units': b'',
 b'description': b'Extended source flag. This is an integer flag, the value of which indicates whether or not the morphology of a source is consistent with the WISE point spread function in any band, or whether the source is associated with or superimposed on a previously known extended object from the 2MASS Extended Source Catalog (XSC). The values of the ext_flg indicate the following conditions: 0 - The source shape is consistent with a point-source and the source is not associated with or superimposed on a 2MASS XSC source 1 - The profile-fit photometry goodness-of-fit, w?rchi2, is >3.0 in one or more bands. 2 - The source falls within the extrapolated isophotal footprint of a 2MASS XSC source. 3 - The profile-fit photometry goodness-of-fit, w?rchi2, is >3.0 in one or more bands, and The source falls within the extrapolated isophotal footprint of a 2MASS XSC source. 4 - The source position falls within 5" of a 2MASS XSC source. 5 - The profile-fit photometry goodness-of-fit, w?rchi2, is >3.0 in one or more bands, and the source position falls within 5" of a 2MASS XSC source. CAUTION: WISE profile-fit (w?mpro) and standard aperture (w?mag) measurements are optimized for point sources and will systematically underestimate the true flux of resolved objects. If a source entry has ext_flg>0, you may wish to examine the large aperture photometry, or the elliptical aperture photometry which are measured using areas that are scaled from 2MASS XSC morphologies.'}

ra_min, ra_max, dec_min, dec_max = 62, 66, 25, 29  # deg

polygon_corners = [(ra_min, dec_min), (ra_min, dec_max), (ra_max, dec_max), (ra_max, dec_min)]
corners = list(zip(*polygon_corners))  # [(ra values), (dec values)]

k = 5
polygon_pixels = hpgeom.query_polygon(a=corners[0], b=corners[1], nside=hpgeom.order_to_nside(k), inclusive=True)

results_df = pd.read_parquet(
    parquet_root,
    filesystem=fs,
    # columns to be returned. similar to a SQL SELECT clause.
    columns=["designation", "w1mpro", "w2mpro", "w3mpro", "w4mpro", f"healpix_k{k}"],
    # row filters. similar to a SQL WHERE clause.
    # supported operators: ==, !=, <, >, <=, >=, in, not in
    # tuple conditions are joined by AND (for OR, use a list of lists)
    filters=[
        ("ext_flg", "==", 0),
        ("ra", ">", ra_min),
        ("ra", "<", ra_max),
        ("dec", ">", dec_min),
        ("dec", "<", dec_max),
        # include filter on partition column for efficiency. similar to a database index.
        (f"healpix_k{k}", "in", polygon_pixels),
    ],
)

results_df["W1-W2"] = results_df["w1mpro"] - results_df["w2mpro"]
results_df["W2-W3"] = results_df["w2mpro"] - results_df["w3mpro"]
results_df.plot.hexbin("W2-W3", "W1-W2", norm=colors.LogNorm(vmin=1, vmax=500))

<Axes: xlabel='W2-W3', ylabel='W1-W2'>

Cloud Access Variable	Definition
`BUCKET_NAME`	AWS S3 bucket name.
`BUCKET_REGION`	AWS region the bucket is in.
`IMAGES_PREFIX`	S3 prefix to the base of an image set.
`CATALOG_PREFIX`	S3 prefix to the base of a catalog.
`PARQUET_NAME`	Path to the base of the catalog's Parquet dataset, relative to the prefix.

IRSA Cloud Access Introduction¶

1. Cloud basics¶

1.1 Terminology¶

1.2 General access¶

2. Find datasets and connection information¶

3. Imports¶

4. Browse buckets¶

5. Find an image and retrieve a cutout¶

5.1 Find an image using PyVO and a coordinate search¶

5.2 Retrieve a cutout using Astropy¶

6. Navigate a catalog and perform a basic query¶

6.1 View the schema using PyArrow¶

6.2 Perform a basic spatial search with Pandas¶

About this notebook¶