Skip to content

Commit

Permalink
Merge pull request #3175 from cds-astro/simbad-hierarchy
Browse files Browse the repository at this point in the history
feat: add query_hierarchy
  • Loading branch information
bsipocz authored Jan 14, 2025
2 parents 94dbe5c + db80653 commit e13430f
Show file tree
Hide file tree
Showing 6 changed files with 228 additions and 2 deletions.
3 changes: 3 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,9 @@ simbad
- fixed ``query_objects`` that would not work in combination with the additional field
``ident`` [#3149]

- added ``query_hierarchy``: a new method that allows to get the parents, children, or
siblings of an object [#3175]

skyview
^^^^^^^

Expand Down
90 changes: 88 additions & 2 deletions astroquery/simbad/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,7 @@ def columns_in_output(self):
- `query_objects`,
- `query_region`,
- `query_catalog`,
- `query_hierarchy`,
- `query_bibobj`,
- `query_criteria`.
Expand Down Expand Up @@ -359,6 +360,7 @@ def add_votable_fields(self, *args):
- `query_objects`,
- `query_region`,
- `query_catalog`,
- `query_hierarchy`,
- `query_bibobj`,
- `query_criteria`.
Expand Down Expand Up @@ -487,6 +489,7 @@ def reset_votable_fields(self):
- `query_objects`,
- `query_region`,
- `query_catalog`,
- `query_hierarchy`,
- `query_bibobj`,
- `query_criteria`.
Expand Down Expand Up @@ -855,6 +858,86 @@ def query_catalog(self, catalog, *, criteria=None, get_query_payload=False,
return self._query(top, columns, joins, instance_criteria,
get_query_payload=get_query_payload)

def query_hierarchy(self, name, hierarchy, *,
detailed_hierarchy=False,
criteria=None, get_query_payload=False):
"""Query either the parents or the children of the object.
Parameters
----------
name : str
name of the object
hierarchy : str
Can take the values "parents" to return the parents of the object (ex: a
galaxy cluster is a parent of a galaxy), the value "children" to return
the children of an object (ex: stars can be children of a globular cluster),
or the value "siblings" to return the object that share a parent with the
given one (ex: the stars of an open cluster are all siblings).
detailed_hierarchy : bool
Whether to add the two extra columns 'hierarchy_bibcode' that gives the
article in which the hierarchy link is mentioned, and
'membership_certainty'. membership_certainty is an integer that reflects the
certainty of the hierarchy link according to the authors. Ranges between 0
and 100 where 100 means that the authors were certain of the classification.
Defaults to False.
criteria : str
Criteria to be applied to the query. These should be written in the ADQL
syntax in a single string. See example.
get_query_payload : bool, optional
When set to `True` the method returns the HTTP request parameters without
querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload.
Defaults to `False`.
Returns
-------
table : `~astropy.table.Table`
Query results table
Examples
--------
>>> from astroquery.simbad import Simbad
>>> parent = Simbad.query_hierarchy("2MASS J18511048-0615470",
... hierarchy="parents") # doctest: +REMOTE_DATA
>>> parent[["main_id", "ra", "dec"]] # doctest: +REMOTE_DATA
<Table length=1>
main_id ra dec
deg deg
object float64 float64
--------- ------- -------
NGC 6705 282.766 -6.272
"""
top, columns, joins, instance_criteria = self._get_query_parameters()

sub_query = ("(SELECT oidref FROM ident "
f"WHERE id = '{name}') AS name")

if detailed_hierarchy:
columns.append(_Column("h_link", "link_bibcode", "hierarchy_bibcode"))
columns.append(_Column("h_link", "membership", "membership_certainty"))

if hierarchy == "parents":
joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "parent"))]
instance_criteria.append("h_link.child = name.oidref")
elif hierarchy == "children":
joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "child"))]
instance_criteria.append("h_link.parent = name.oidref")
elif hierarchy == "siblings":
sub_query = ("(SELECT DISTINCT basic.oid FROM "
f"{sub_query}, basic JOIN h_link ON basic.oid = h_link.parent "
"WHERE h_link.child = name.oidref) AS parents")
joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "child"))]
instance_criteria.append("h_link.parent = parents.oid")
else:
raise ValueError("'hierarchy' can only take the values 'parents', "
f"'siblings', or 'children'. Got '{hierarchy}'.")

if criteria:
instance_criteria.append(f"({criteria})")

return self._query(top, columns, joins, instance_criteria,
from_table=f"{sub_query}, basic", distinct=True,
get_query_payload=get_query_payload)

@deprecated_renamed_argument(["verbose"], new_name=[None],
since=['0.4.8'], relax=True)
def query_bibobj(self, bibcode, *, criteria=None,
Expand Down Expand Up @@ -1369,7 +1452,7 @@ def _get_query_parameters(self):
"""Get the current building blocks of an ADQL query."""
return tuple(map(copy.deepcopy, (self.ROW_LIMIT, self.columns_in_output, self.joins, self.criteria)))

def _query(self, top, columns, joins, criteria, from_table="basic",
def _query(self, top, columns, joins, criteria, from_table="basic", distinct=False,
get_query_payload=False, **uploads):
"""Generate an ADQL string from the given query parameters and executes the query.
Expand All @@ -1386,6 +1469,8 @@ def _query(self, top, columns, joins, criteria, from_table="basic",
with an AND clause.
from_table : str, optional
The table after 'FROM' in the ADQL string. Defaults to "basic".
distinct : bool, optional
Whether to add the DISTINCT instruction to the query.
get_query_payload : bool, optional
When set to `True` the method returns the HTTP request parameters without
querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload.
Expand All @@ -1400,6 +1485,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic",
`~astropy.table.Table`
The result of the query to SIMBAD.
"""
distinct_results = " DISTINCT" if distinct else ""
top_part = f" TOP {top}" if top != -1 else ""

# columns
Expand Down Expand Up @@ -1433,7 +1519,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic",
else:
criteria = ""

query = f"SELECT{top_part}{columns} FROM {from_table}{join}{criteria}"
query = f"SELECT{distinct_results}{top_part}{columns} FROM {from_table}{join}{criteria}"

response = self.query_tap(query, get_query_payload=get_query_payload,
maxrec=self.hardlimit,
Expand Down
31 changes: 31 additions & 0 deletions astroquery/simbad/tests/test_simbad.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,37 @@ def test_query_catalog():
assert adql.endswith(where_clause)


@pytest.mark.usefixtures("_mock_simbad_class")
def test_query_hierarchy():
simbad_instance = simbad.Simbad()
detailed = ('h_link."link_bibcode" AS "hierarchy_bibcode", h_link."membership"'
' AS "membership_certainty"')
# the three possible cases
adql = simbad_instance.query_hierarchy("test", hierarchy="parents",
detailed_hierarchy=True,
get_query_payload=True)["QUERY"]
assert "h_link.child = name.oidref" in adql
assert detailed in adql
adql = simbad_instance.query_hierarchy("test", hierarchy="children",
criteria="test=test",
get_query_payload=True)["QUERY"]
assert "h_link.parent = name.oidref" in adql
assert "test=test" in adql
assert detailed not in adql
adql = simbad_instance.query_hierarchy("test", hierarchy="siblings",
get_query_payload=True)["QUERY"]
assert "h_link.parent = parents.oid" in adql
# if the keyword does not correspond
with pytest.raises(ValueError, match="'hierarchy' can only take the values "
"'parents', 'siblings', or 'children'. Got 'test'."):
simbad_instance.query_hierarchy("object", hierarchy="test",
get_query_payload=True)
# if the people were used to the old votable_fields
with pytest.raises(ValueError, match="The hierarchy information is no longer an "
"additional field. *"):
simbad_instance.add_votable_fields("membership")


@pytest.mark.parametrize(('coordinates', 'radius', 'where'),
[(ICRS_COORDS, 2*u.arcmin,
r"WHERE CONTAINS\(POINT\('ICRS', basic\.ra, basic\.dec\), "
Expand Down
11 changes: 11 additions & 0 deletions astroquery/simbad/tests/test_simbad_remote.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,17 @@ def test_query_catalog(self):
result = self.simbad.query_catalog('M')
assert len(result) == 110

def test_query_hierarchy(self):
self.simbad.ROW_LIMIT = -1
obj = "NGC 4038"
parents = self.simbad.query_hierarchy(obj, hierarchy="parents")
assert len(parents) == 4
children = self.simbad.query_hierarchy(obj, hierarchy="children")
assert len(children) >= 45 # as of 2025, but more could be added
siblings = self.simbad.query_hierarchy(obj, hierarchy="siblings",
criteria="otype='G..'")
assert len(siblings) >= 29

def test_query_region(self):
self.simbad.ROW_LIMIT = 10
result = self.simbad.query_region(ICRS_COORDS_M42, radius="1d")
Expand Down
3 changes: 3 additions & 0 deletions astroquery/simbad/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def _catch_deprecated_fields_with_arguments(votable_field):
if votable_field.startswith("bibcodelist("):
raise ValueError("Selecting a range of years for bibcode is removed. You can still use "
"bibcodelist without parenthesis and get the full list of bibliographic references.")
if votable_field in ["membership", "link_bibcode"]:
raise ValueError("The hierarchy information is no longer an additional field. "
"It has been replaced by the 'query_hierarchy' method.")

# ----------------------------
# Support wildcard argument
Expand Down
92 changes: 92 additions & 0 deletions docs/simbad/simbad.rst
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,96 @@ associated with an object.
NAME North Star
WEB 2438

Query to get all parents (or children, or siblings) of an object
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Let's find the galaxies composing the galaxy pair ``Mrk 116``:

.. doctest-remote-data::

>>> from astroquery.simbad import Simbad
>>> galaxies = Simbad.query_hierarchy("Mrk 116",
... hierarchy="children", criteria="otype='G..'")
>>> galaxies[["main_id", "ra", "dec"]]
<Table length=2>
main_id ra dec
deg deg
object float64 float64
--------- --------------- --------------
Mrk 116A 143.50821525019 55.24105273196
Mrk 116B 143.509956 55.239762

Alternatively, if we know one member of a group, we can find the others by asking for
``siblings``:

.. doctest-remote-data::

>>> from astroquery.simbad import Simbad
>>> galaxies = Simbad.query_hierarchy("Mrk 116A",
... hierarchy="siblings", criteria="otype='G..'")
>>> galaxies[["main_id", "ra", "dec"]]
<Table length=2>
main_id ra dec
deg deg
object float64 float64
--------- --------------- --------------
Mrk 116A 143.50821525019 55.24105273196
Mrk 116B 143.509956 55.239762

Note that if we had not added the criteria on the object type, we would also get
some stars that are part of these galaxies in the result.

And the other way around, let's find which cluster of stars contains
``2MASS J18511048-0615470``:

.. doctest-remote-data::

>>> from astroquery.simbad import Simbad
>>> cluster = Simbad.query_hierarchy("2MASS J18511048-0615470", hierarchy="parents")
>>> cluster[["main_id", "ra", "dec"]]
<Table length=1>
main_id ra dec
deg deg
object float64 float64
--------- ------- -------
NGC 6705 282.766 -6.272

If needed, we can get a more detailed report with the two extra columns:
- ``hierarchy_bibcode`` : the paper in which the hierarchy is established,
- ``membership_certainty``: if present in the paper, a certainty index (100 meaning
100% sure).

.. doctest-remote-data::

>>> from astroquery.simbad import Simbad
>>> cluster = Simbad.query_hierarchy("2MASS J18511048-0615470",
... hierarchy="parents",
... detailed_hierarchy=True)
>>> cluster[["main_id", "ra", "dec", "hierarchy_bibcode", "membership_certainty"]]
<Table length=13>
main_id ra dec hierarchy_bibcode membership_certainty
deg deg percent
object float64 float64 object int16
--------- ------- ------- ------------------- --------------------
NGC 6705 282.766 -6.272 2014A&A...563A..44M 100
NGC 6705 282.766 -6.272 2015A&A...573A..55T 100
NGC 6705 282.766 -6.272 2016A&A...591A..37J 100
NGC 6705 282.766 -6.272 2018A&A...618A..93C 100
NGC 6705 282.766 -6.272 2020A&A...633A..99C 100
NGC 6705 282.766 -6.272 2020A&A...640A...1C 100
NGC 6705 282.766 -6.272 2020A&A...643A..71G 100
NGC 6705 282.766 -6.272 2020ApJ...903...55P 100
NGC 6705 282.766 -6.272 2020MNRAS.496.4701J 100
NGC 6705 282.766 -6.272 2021A&A...647A..19T 100
NGC 6705 282.766 -6.272 2021A&A...651A..84M 100
NGC 6705 282.766 -6.272 2021MNRAS.503.3279S 99
NGC 6705 282.766 -6.272 2022MNRAS.509.1664J 100

Here, we see that the Simbad team found 13 papers mentioning the fact that
``2MASS J18511048-0615470`` is a member of ``NGC 6705`` and that the authors of these
articles gave high confidence indices for this membership (``membership_certainty`` is
close to 100 for all bibcodes).


Query a region
^^^^^^^^^^^^^^
Expand Down Expand Up @@ -421,6 +511,7 @@ Some query methods outputs can be customized. This is the case for:
- `~astroquery.simbad.SimbadClass.query_objects`
- `~astroquery.simbad.SimbadClass.query_region`
- `~astroquery.simbad.SimbadClass.query_catalog`
- `~astroquery.simbad.SimbadClass.query_hierarchy`
- `~astroquery.simbad.SimbadClass.query_bibobj`

For these methods, the default columns in the output are:
Expand Down Expand Up @@ -523,6 +614,7 @@ Most query methods take a ``criteria`` argument. They are listed here:
- `~astroquery.simbad.SimbadClass.query_objects`
- `~astroquery.simbad.SimbadClass.query_region`
- `~astroquery.simbad.SimbadClass.query_catalog`
- `~astroquery.simbad.SimbadClass.query_hierarchy`
- `~astroquery.simbad.SimbadClass.query_bibobj`
- `~astroquery.simbad.SimbadClass.query_bibcode`
- `~astroquery.simbad.SimbadClass.query_objectids`
Expand Down

0 comments on commit e13430f

Please sign in to comment.