Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support HEALpix-indexed AGASC HDF5 files and more #155

Merged
merged 34 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from 32 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
a518a2b
Initial commit to support HEALpix-indexed AGASC HDF5 files
taldcroft Sep 12, 2023
799e19f
Fix typo
taldcroft Sep 12, 2023
f9e2d23
Minimal modernization of create_mini_agasc_h5.py
taldcroft Sep 12, 2023
0be1563
Refactor and modernize
taldcroft Sep 13, 2023
e9fef74
Apply black formatting
taldcroft Sep 13, 2023
f64aebd
Apply black to create_near_neighbor_ids
taldcroft Sep 13, 2023
45b1533
Improvements to miniagasc creation helper scripts
taldcroft Sep 13, 2023
cbccf43
Support for caching and columns in get_agasc_cone
taldcroft Sep 18, 2023
0805567
Working get_agasc_cone for healpix
taldcroft Sep 18, 2023
c90685b
Remove columns as an option since slows performance
taldcroft Sep 18, 2023
cad21fe
Fix a couple of issues in create_near_neighbor_ids
taldcroft Sep 18, 2023
07b069b
Change epoch for computing near-neighbor to 2024.0
taldcroft Sep 18, 2023
fe9b554
Some fixes / improvements in file creation scripts
taldcroft Sep 19, 2023
065ed43
Implement new convention for agasc_file kwarg
taldcroft Sep 19, 2023
601fafa
Doc fixes
taldcroft Sep 19, 2023
3d11988
Fix tests
taldcroft Sep 19, 2023
9744866
Rework the file selection code
taldcroft Sep 22, 2023
238454c
Update filename resolution for latest spec
taldcroft Sep 23, 2023
df155b2
Update conf.py for numpydoc and autodoc typehints
taldcroft Sep 23, 2023
145567c
Flake8
taldcroft Sep 23, 2023
6176521
Add test of get_agasc_filename and fix bug in that function
taldcroft Sep 25, 2023
be7145e
Require either .h5 or * at end of `agasc_file` + other fixes
taldcroft Sep 26, 2023
350d91f
Rename and overhaul create_mini_agasc_h5
taldcroft Sep 26, 2023
a5d38c5
Add option to allow RC files for testing
taldcroft Sep 27, 2023
8ea5a53
Add version kwarg and fix tests
taldcroft Sep 27, 2023
3bfa5a2
Test get_agasc_cone, get_star, get_stars for HEALpix
taldcroft Sep 27, 2023
e314a63
Flake8
taldcroft Sep 28, 2023
743285d
Add new dev script for profiling
taldcroft Sep 29, 2023
ca4613b
Reduce memory from supplement
taldcroft Sep 30, 2023
775e156
Add dev scripts for memory profiling
taldcroft Oct 1, 2023
4a5f473
Fix name
taldcroft Oct 1, 2023
0194306
Add documentation on HEALpix ordering
taldcroft Oct 1, 2023
476e183
Add agasc_file to stars meta
taldcroft Oct 3, 2023
c41fe20
Add dev script to profile caching performance
taldcroft Oct 3, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
322 changes: 284 additions & 38 deletions agasc/agasc.py

Large diffs are not rendered by default.

141 changes: 141 additions & 0 deletions agasc/healpix.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
Provide functions for working with HEALPix-indexed AGASC HDF5 files.

Functions
---------
is_healpix(agasc_file)
Return True if `agasc_file` is a HEALPix file (otherwise dec-sorted).
get_stars_from_healpix_h5(ra, dec, radius, agasc_file)
Return a table of stars within a given radius around a given sky position.
"""

import functools
from pathlib import Path

import astropy.units as u
import astropy_healpix as hpx
import numpy as np
import tables
from astropy.table import Table


__all__ = ["is_healpix", "get_stars_from_healpix_h5"]


@functools.lru_cache()
def is_healpix(agasc_file):
"""Return True if ``agasc_file`` is a healpix file (otherwise dec-sorted)"""
with tables.open_file(agasc_file, mode="r") as h5:
return "healpix_index" in h5.root


@functools.lru_cache(maxsize=12)
def get_healpix(nside):
"""
Returns a HEALPix object with the specified nside and nested order.

Parameters
-----------
nside : int
The nside parameter for the HEALPix object.

Returns:
--------
hpx : HEALPix object
A HEALPix object with the specified nside and order.
"""
return hpx.HEALPix(nside=nside, order="nested")


@functools.lru_cache(maxsize=8)
def get_healpix_info(agasc_file: str | Path) -> tuple[dict[int, tuple[int, int]], int]:
"""
Get the healpix index table for an AGASC file.

The healpix index table is a table with columns ``healpix``, ``idx0`` and ``idx1``.
This corresponds to row ranges in the main ``data`` table in the HDF5 file.

Parameters
----------
agasc_file : str or Path
Path to the AGASC HDF5 file.

Returns
-------
healpix_index : dict
Dictionary of healpix index to row range.
nside : int
HEALPix nside parameter.
"""
with tables.open_file(agasc_file, mode="r") as h5:
tbl = h5.root.healpix_index[:]
nside = h5.root.healpix_index.attrs["nside"]

out = {row["healpix"]: (row["row0"], row["row1"]) for row in tbl}

return out, nside


def get_stars_from_healpix_h5(
ra: float,
dec: float,
radius: float,
agasc_file: str | Path,
cache: bool = False,
) -> Table:
"""
Returns a table of stars within a given radius around a given sky position (RA, Dec),
using the AGASC data stored in a HDF5 file with a HEALPix index.

Parameters
----------
ra : float
Right ascension of the center of the search cone, in degrees.
dec : float
Declination of the center of the search cone, in degrees.
radius : float
Radius of the search cone, in degrees.
agasc_file : str or Path
Path to the HDF5 file containing the AGASC data with a HEALPix index.
cache : bool, optional
Whether to cache the AGASC data in memory. Default is False.

Returns
-------
stars : astropy.table.Table
Table of stars within the search cone, with columns from the AGASC data table.
"""
from agasc import sphere_dist, read_h5_table

# Table of healpix, idx0, idx1 where idx is the index into main AGASC data table
healpix_index_map, nside = get_healpix_info(agasc_file)
hp = get_healpix(nside)

# Get healpix index for ever pixel that intersects the cone.
healpix_indices = hp.cone_search_lonlat(
ra * u.deg, dec * u.deg, radius=radius * u.deg
)

stars_list = []

def make_stars_list(h5_file):
for healpix_index in healpix_indices:
idx0, idx1 = healpix_index_map[healpix_index]
stars = read_h5_table(h5_file, row0=idx0, row1=idx1, cache=cache)
stars_list.append(stars)

if cache:
make_stars_list(agasc_file)
else:
with tables.open_file(agasc_file) as h5:
make_stars_list(h5)

stars = Table(np.concatenate(stars_list))
dists = sphere_dist(ra, dec, stars["RA"], stars["DEC"])
stars = stars[dists <= radius]

# Sort in DEC order for back-compatibility with the AGASC ordering before v1.8.
stars.sort("DEC")

return stars
5 changes: 3 additions & 2 deletions agasc/paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,9 @@ def default_agasc_dir():


def default_agasc_file():
"""Default main AGASC file ``agasc_dir() / miniagasc.h5``.
"""Default main AGASC file.

:returns: str
"""
return str(default_agasc_dir() / 'miniagasc.h5')
from agasc import get_agasc_filename
return get_agasc_filename()
2 changes: 2 additions & 0 deletions agasc/supplement/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,8 @@ def get_supplement_table(name, agasc_dir=None, as_dict=False):
out[key] = {nm: row[nm].item() for nm in row.dtype.names if nm not in key_names}
else:
out = Table(dat)
index = {agasc_id: idx for idx, agasc_id in enumerate(out['agasc_id'])}
out.meta["index"] = index

return out

Expand Down
87 changes: 85 additions & 2 deletions agasc/tests/test_agasc_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import tempfile

import numpy as np
import pytest
import tables
from astropy.table import Table
from ska_helpers.utils import temp_env_var

from .. import agasc
import agasc
from agasc.agasc import update_color1_column


def test_multi_agasc():
Expand Down Expand Up @@ -61,7 +64,7 @@ def test_update_color1_func():
# Fifth is still 1.5 because RSV3=0 (no good mag available so still "bad mag")
# Sixth now gets COLOR1 = COLOR2 * 0.850 = 2.0
stars = Table([color1, color2, rsv3], names=['COLOR1', 'COLOR2', 'RSV3'])
agasc.update_color1_column(stars)
update_color1_column(stars)

assert np.allclose(stars['COLOR1'], [1.0, 1.0, 1.5, 1.499, 1.5, 2.0])
assert np.allclose(stars['COLOR2'], color2)
Expand Down Expand Up @@ -105,3 +108,83 @@ def test_update_color1_get_agasc_cone():
stars.add_index('AGASC_ID')
assert np.isclose(stars.loc[759960152]['COLOR1'], 1.5, rtol=0, atol=0.0005)
assert np.isclose(stars.loc[759439648]['COLOR1'], 1.5, rtol=0, atol=0.0005)


def test_get_agasc_filename(tmp_path, monkeypatch):
monkeypatch.setenv("AGASC_DIR", str(tmp_path))
names = [
"agasc1p6.h5",
"agasc1p7.h5",
"agasc1p8.h5",
"agasc1p8.hdf5",
"agasc1p8rc2.h5",
"proseco_agasc_1p6.h5",
"proseco_agasc_1p7.h5",
"proseco_agasc_1p8.h5",
"proseco_agasc_1p8rc2.h5",
"proseco_agasc_1p9rc1.h5",
"miniagasc_1p6.h5",
"miniagasc_1p7.h5",
"miniagasC_1p7.h5",
"miniagasc_1p10.h5",
"miniagasc_2p8.h5",
]
for name in names:
(tmp_path / name).touch()

def _check(filename, expected, allow_rc=False, version=None):
assert agasc.get_agasc_filename(filename, allow_rc, version) == str(expected)

# Default is latest proseco_agasc in AGASC_DIR
_check(None, tmp_path / "proseco_agasc_1p8.h5")

# Default is latest proseco_agasc in AGASC_DIR
_check(None, tmp_path / "proseco_agasc_1p9rc1.h5", allow_rc=True)

# With no wildcard just add .h5. File existence is not required by this function.
with pytest.raises(ValueError, match=r"agasc_file must end with '\*' or '.h5'"):
_check("agasc1p6", tmp_path / "agasc1p6.h5")

# Doesn't find the rc2 version regardless of allow_rc (agasc_1p8.h5 wins over
# agasc_1p8rc2.h5).
_check("agasc*", tmp_path / "agasc1p8.h5", allow_rc=False)
_check("agasc*", tmp_path / "agasc1p8.h5", allow_rc=True)

# 1p8rc2 is available but it takes the non-RC version 1p8
_check(
"proseco_agasc_*",
tmp_path / "proseco_agasc_1p8.h5",
allow_rc=True,
version="1p8",
)
# You can choose the RC version explicitly
_check(
"proseco_agasc_*",
tmp_path / "proseco_agasc_1p8rc2.h5",
allow_rc=True,
version="1p8rc2",
)
# For version="1p9" only the 1p9rc1 version is available
_check(
"proseco_agasc_*",
tmp_path / "proseco_agasc_1p9rc1.h5",
allow_rc=True,
version="1p9",
)

# Wildcard finds the right file (and double-digit version is OK)
_check("miniagasc_*", tmp_path / "miniagasc_1p10.h5")

# With .h5 supplied just return the file, again don't require existence.
_check("agasc1p7.h5", "agasc1p7.h5")
_check("doesnt-exist.h5", "doesnt-exist.h5")

# With AGASC_HDF5_FILE set, use that if agasc_file is None
with temp_env_var("AGASC_HDF5_FILE", "proseco_agasc_1p5.h5"):
_check(None, tmp_path / "proseco_agasc_1p5.h5")
# Explicit agasc_file overrides AGASC_HDF5_FILE
_check("agasc1p7.h5", "agasc1p7.h5")

# With a glob pattern existence of a matching file is required
with pytest.raises(FileNotFoundError, match="No AGASC files"):
agasc.get_agasc_filename("doesnt-exist*")
56 changes: 36 additions & 20 deletions agasc/tests/test_agasc_2.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,20 @@

TEST_RADIUS = 0.6 # standard testing radius
TEST_DIR = Path(__file__).parent
DATA_DIR = Path(os.environ['SKA'], 'data', 'agasc')
AGASC_FILE = {}
AGASC_FILE['1p6'] = DATA_DIR / 'miniagasc_1p6.h5'
AGASC_FILE['1p7'] = DATA_DIR / 'miniagasc.h5' # Latest release

# Whether to test DS AGASC vs. agasc package HDF5 files
TEST_ASCDS = DS_AGASC_VERSION is not None and AGASC_FILE[DS_AGASC_VERSION].exists()
if DS_AGASC_VERSION is None:
TEST_ASCDS = False
else:
try:
agasc.get_agasc_filename('miniagasc_*', version=DS_AGASC_VERSION)
except FileNotFoundError:
TEST_ASCDS = False
else:
TEST_ASCDS = True

# Latest full release of miniagasc
MINIAGASC = agasc.get_agasc_filename('miniagasc_*')


def get_ds_agasc_cone(ra, dec):
Expand Down Expand Up @@ -163,9 +170,14 @@ def test_agasc_conesearch(ra, dec, version):
ref_stars = get_reference_agasc_values(ra, dec, version=version)
except FileNotFoundError:
if os.environ.get('WRITE_AGASC_TEST_FILES'):
ref_stars = agasc.get_agasc_cone(ra, dec, radius=TEST_RADIUS,
agasc_file=AGASC_FILE[version],
date='2000:001', fix_color1=False)
ref_stars = agasc.get_agasc_cone(
ra,
dec,
radius=TEST_RADIUS,
agasc_file=agasc.get_agasc_filename('miniagasc_*', version=version),
date='2000:001',
fix_color1=False
)
test_file = get_test_file(ra, dec, version)
print(f'\nWriting {test_file} based on miniagasc\n')
ref_stars.write(test_file, format='fits')
Expand All @@ -186,8 +198,9 @@ def test_against_ds_agasc(ra, dec):


def _test_agasc(ra, dec, ref_stars, version='1p7'):
agasc_file = agasc.get_agasc_filename('miniagasc_*', version=version)
stars1 = agasc.get_agasc_cone(ra, dec, radius=TEST_RADIUS,
agasc_file=AGASC_FILE[version],
agasc_file=agasc_file,
date='2000:001', fix_color1=False)
stars1.sort('AGASC_ID')

Expand Down Expand Up @@ -336,7 +349,7 @@ def mp_get_agascid(agasc_id):
@pytest.mark.skipif('not TEST_ASCDS')
@pytest.mark.parametrize("ra,dec", list(zip(RAS[:2], DECS[:2])))
def test_agasc_id(ra, dec, radius=0.2, nstar_limit=5):
agasc_file = AGASC_FILE[DS_AGASC_VERSION]
agasc_file = agasc.get_agasc_filename("miniagasc_*", version=DS_AGASC_VERSION)

print('ra, dec =', ra, dec)
stars = agasc.get_agasc_cone(ra, dec, radius=radius, agasc_file=agasc_file,
Expand All @@ -355,15 +368,14 @@ def test_agasc_id(ra, dec, radius=0.2, nstar_limit=5):


def test_proseco_agasc_1p7():
proseco_file = DATA_DIR / 'proseco_agasc_1p7.h5'
if not proseco_file.exists():
pytest.skip(f'No proseco agasc file {proseco_file} found')
proseco_file = agasc.get_agasc_filename("proseco_agasc_*", version="1p7")
mini_file = agasc.get_agasc_filename("miniagasc_*", version="1p7")

# Stars looking toward galactic center (dense!)
p_stars = agasc.get_agasc_cone(-266, -29, 3,
agasc_file=proseco_file, date='2000:001')
m_stars = agasc.get_agasc_cone(-266, -29, 3,
agasc_file=AGASC_FILE['1p7'], date='2000:001')
agasc_file=mini_file, date='2000:001')

# Every miniagasc_1p7 star is in proseco_agasc_1p7
m_ids = m_stars['AGASC_ID']
Expand All @@ -382,8 +394,12 @@ def test_proseco_agasc_1p7():
@pytest.mark.skipif(NO_MAGS_IN_SUPPLEMENT, reason='no mags in supplement')
def test_supplement_get_agasc_cone():
ra, dec = 282.53, -0.38 # Obsid 22429 with a couple of color1=1.5 stars
stars1 = agasc.get_agasc_cone(ra, dec, date='2021:001', use_supplement=False)
stars2 = agasc.get_agasc_cone(ra, dec, date='2021:001', use_supplement=True)
stars1 = agasc.get_agasc_cone(
ra, dec, date='2021:001', agasc_file=MINIAGASC, use_supplement=False
)
stars2 = agasc.get_agasc_cone(
ra, dec, date='2021:001', agasc_file=MINIAGASC, use_supplement=True
)
ok = stars2['MAG_CATID'] == agasc.MAG_CATID_SUPPLEMENT

change_names = ['MAG_CATID', 'COLOR1', 'MAG_ACA', 'MAG_ACA_ERR']
Expand Down Expand Up @@ -420,8 +436,8 @@ def test_supplement_get_star():
agasc_id = 58720672
# Also checks that the default is False given the os.environ override for
# this test file.
star1 = agasc.get_star(agasc_id)
star2 = agasc.get_star(agasc_id, use_supplement=True)
star1 = agasc.get_star(agasc_id, agasc_file=MINIAGASC)
star2 = agasc.get_star(agasc_id, agasc_file=MINIAGASC, use_supplement=True)
assert star1['MAG_CATID'] != agasc.MAG_CATID_SUPPLEMENT
assert star2['MAG_CATID'] == agasc.MAG_CATID_SUPPLEMENT

Expand Down Expand Up @@ -463,8 +479,8 @@ def test_supplement_get_star_disable_decorator():
@pytest.mark.skipif(NO_MAGS_IN_SUPPLEMENT, reason='no mags in supplement')
def test_supplement_get_stars():
agasc_ids = [58720672, 670303120]
star1 = agasc.get_stars(agasc_ids)
star2 = agasc.get_stars(agasc_ids, use_supplement=True)
star1 = agasc.get_stars(agasc_ids, agasc_file=MINIAGASC)
star2 = agasc.get_stars(agasc_ids, agasc_file=MINIAGASC, use_supplement=True)
assert np.all(star1['MAG_CATID'] != agasc.MAG_CATID_SUPPLEMENT)
assert np.all(star2['MAG_CATID'] == agasc.MAG_CATID_SUPPLEMENT)

Expand Down
Loading
Loading