From db806536ffb62fdb355ebf9ad3ccb6b8a7c2485f Mon Sep 17 00:00:00 2001 From: MARCHAND MANON Date: Tue, 14 Jan 2025 11:42:28 +0100 Subject: [PATCH] feat: add query_hierarchy to get children, parents, or siblings of an object --- CHANGES.rst | 3 + astroquery/simbad/core.py | 90 +++++++++++++++++- astroquery/simbad/tests/test_simbad.py | 31 +++++++ astroquery/simbad/tests/test_simbad_remote.py | 11 +++ astroquery/simbad/utils.py | 3 + docs/simbad/simbad.rst | 92 +++++++++++++++++++ 6 files changed, 228 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index b212e7d14a..683dbae938 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -176,6 +176,9 @@ simbad - fixed ``query_objects`` that would not work in combination with the additional field ``ident`` [#3149] +- added ``query_hierarchy``: a new method that allows to get the parents, children, or + siblings of an object [#3175] + skyview ^^^^^^^ diff --git a/astroquery/simbad/core.py b/astroquery/simbad/core.py index 28f37b6af1..4f801ffd96 100644 --- a/astroquery/simbad/core.py +++ b/astroquery/simbad/core.py @@ -176,6 +176,7 @@ def columns_in_output(self): - `query_objects`, - `query_region`, - `query_catalog`, + - `query_hierarchy`, - `query_bibobj`, - `query_criteria`. @@ -359,6 +360,7 @@ def add_votable_fields(self, *args): - `query_objects`, - `query_region`, - `query_catalog`, + - `query_hierarchy`, - `query_bibobj`, - `query_criteria`. @@ -487,6 +489,7 @@ def reset_votable_fields(self): - `query_objects`, - `query_region`, - `query_catalog`, + - `query_hierarchy`, - `query_bibobj`, - `query_criteria`. @@ -855,6 +858,86 @@ def query_catalog(self, catalog, *, criteria=None, get_query_payload=False, return self._query(top, columns, joins, instance_criteria, get_query_payload=get_query_payload) + def query_hierarchy(self, name, hierarchy, *, + detailed_hierarchy=False, + criteria=None, get_query_payload=False): + """Query either the parents or the children of the object. + + Parameters + ---------- + name : str + name of the object + hierarchy : str + Can take the values "parents" to return the parents of the object (ex: a + galaxy cluster is a parent of a galaxy), the value "children" to return + the children of an object (ex: stars can be children of a globular cluster), + or the value "siblings" to return the object that share a parent with the + given one (ex: the stars of an open cluster are all siblings). + detailed_hierarchy : bool + Whether to add the two extra columns 'hierarchy_bibcode' that gives the + article in which the hierarchy link is mentioned, and + 'membership_certainty'. membership_certainty is an integer that reflects the + certainty of the hierarchy link according to the authors. Ranges between 0 + and 100 where 100 means that the authors were certain of the classification. + Defaults to False. + criteria : str + Criteria to be applied to the query. These should be written in the ADQL + syntax in a single string. See example. + get_query_payload : bool, optional + When set to `True` the method returns the HTTP request parameters without + querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. + Defaults to `False`. + + Returns + ------- + table : `~astropy.table.Table` + Query results table + + Examples + -------- + >>> from astroquery.simbad import Simbad + >>> parent = Simbad.query_hierarchy("2MASS J18511048-0615470", + ... hierarchy="parents") # doctest: +REMOTE_DATA + >>> parent[["main_id", "ra", "dec"]] # doctest: +REMOTE_DATA + + main_id ra dec + deg deg + object float64 float64 + --------- ------- ------- + NGC 6705 282.766 -6.272 + """ + top, columns, joins, instance_criteria = self._get_query_parameters() + + sub_query = ("(SELECT oidref FROM ident " + f"WHERE id = '{name}') AS name") + + if detailed_hierarchy: + columns.append(_Column("h_link", "link_bibcode", "hierarchy_bibcode")) + columns.append(_Column("h_link", "membership", "membership_certainty")) + + if hierarchy == "parents": + joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "parent"))] + instance_criteria.append("h_link.child = name.oidref") + elif hierarchy == "children": + joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "child"))] + instance_criteria.append("h_link.parent = name.oidref") + elif hierarchy == "siblings": + sub_query = ("(SELECT DISTINCT basic.oid FROM " + f"{sub_query}, basic JOIN h_link ON basic.oid = h_link.parent " + "WHERE h_link.child = name.oidref) AS parents") + joins += [_Join("h_link", _Column("basic", "oid"), _Column("h_link", "child"))] + instance_criteria.append("h_link.parent = parents.oid") + else: + raise ValueError("'hierarchy' can only take the values 'parents', " + f"'siblings', or 'children'. Got '{hierarchy}'.") + + if criteria: + instance_criteria.append(f"({criteria})") + + return self._query(top, columns, joins, instance_criteria, + from_table=f"{sub_query}, basic", distinct=True, + get_query_payload=get_query_payload) + @deprecated_renamed_argument(["verbose"], new_name=[None], since=['0.4.8'], relax=True) def query_bibobj(self, bibcode, *, criteria=None, @@ -1369,7 +1452,7 @@ def _get_query_parameters(self): """Get the current building blocks of an ADQL query.""" return tuple(map(copy.deepcopy, (self.ROW_LIMIT, self.columns_in_output, self.joins, self.criteria))) - def _query(self, top, columns, joins, criteria, from_table="basic", + def _query(self, top, columns, joins, criteria, from_table="basic", distinct=False, get_query_payload=False, **uploads): """Generate an ADQL string from the given query parameters and executes the query. @@ -1386,6 +1469,8 @@ def _query(self, top, columns, joins, criteria, from_table="basic", with an AND clause. from_table : str, optional The table after 'FROM' in the ADQL string. Defaults to "basic". + distinct : bool, optional + Whether to add the DISTINCT instruction to the query. get_query_payload : bool, optional When set to `True` the method returns the HTTP request parameters without querying SIMBAD. The ADQL string is in the 'QUERY' key of the payload. @@ -1400,6 +1485,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic", `~astropy.table.Table` The result of the query to SIMBAD. """ + distinct_results = " DISTINCT" if distinct else "" top_part = f" TOP {top}" if top != -1 else "" # columns @@ -1433,7 +1519,7 @@ def _query(self, top, columns, joins, criteria, from_table="basic", else: criteria = "" - query = f"SELECT{top_part}{columns} FROM {from_table}{join}{criteria}" + query = f"SELECT{distinct_results}{top_part}{columns} FROM {from_table}{join}{criteria}" response = self.query_tap(query, get_query_payload=get_query_payload, maxrec=self.hardlimit, diff --git a/astroquery/simbad/tests/test_simbad.py b/astroquery/simbad/tests/test_simbad.py index ad59007919..4834090370 100644 --- a/astroquery/simbad/tests/test_simbad.py +++ b/astroquery/simbad/tests/test_simbad.py @@ -345,6 +345,37 @@ def test_query_catalog(): assert adql.endswith(where_clause) +@pytest.mark.usefixtures("_mock_simbad_class") +def test_query_hierarchy(): + simbad_instance = simbad.Simbad() + detailed = ('h_link."link_bibcode" AS "hierarchy_bibcode", h_link."membership"' + ' AS "membership_certainty"') + # the three possible cases + adql = simbad_instance.query_hierarchy("test", hierarchy="parents", + detailed_hierarchy=True, + get_query_payload=True)["QUERY"] + assert "h_link.child = name.oidref" in adql + assert detailed in adql + adql = simbad_instance.query_hierarchy("test", hierarchy="children", + criteria="test=test", + get_query_payload=True)["QUERY"] + assert "h_link.parent = name.oidref" in adql + assert "test=test" in adql + assert detailed not in adql + adql = simbad_instance.query_hierarchy("test", hierarchy="siblings", + get_query_payload=True)["QUERY"] + assert "h_link.parent = parents.oid" in adql + # if the keyword does not correspond + with pytest.raises(ValueError, match="'hierarchy' can only take the values " + "'parents', 'siblings', or 'children'. Got 'test'."): + simbad_instance.query_hierarchy("object", hierarchy="test", + get_query_payload=True) + # if the people were used to the old votable_fields + with pytest.raises(ValueError, match="The hierarchy information is no longer an " + "additional field. *"): + simbad_instance.add_votable_fields("membership") + + @pytest.mark.parametrize(('coordinates', 'radius', 'where'), [(ICRS_COORDS, 2*u.arcmin, r"WHERE CONTAINS\(POINT\('ICRS', basic\.ra, basic\.dec\), " diff --git a/astroquery/simbad/tests/test_simbad_remote.py b/astroquery/simbad/tests/test_simbad_remote.py index 1cc690689d..da2ea23044 100644 --- a/astroquery/simbad/tests/test_simbad_remote.py +++ b/astroquery/simbad/tests/test_simbad_remote.py @@ -55,6 +55,17 @@ def test_query_catalog(self): result = self.simbad.query_catalog('M') assert len(result) == 110 + def test_query_hierarchy(self): + self.simbad.ROW_LIMIT = -1 + obj = "NGC 4038" + parents = self.simbad.query_hierarchy(obj, hierarchy="parents") + assert len(parents) == 4 + children = self.simbad.query_hierarchy(obj, hierarchy="children") + assert len(children) >= 45 # as of 2025, but more could be added + siblings = self.simbad.query_hierarchy(obj, hierarchy="siblings", + criteria="otype='G..'") + assert len(siblings) >= 29 + def test_query_region(self): self.simbad.ROW_LIMIT = 10 result = self.simbad.query_region(ICRS_COORDS_M42, radius="1d") diff --git a/astroquery/simbad/utils.py b/astroquery/simbad/utils.py index 675f183647..fd78068abe 100644 --- a/astroquery/simbad/utils.py +++ b/astroquery/simbad/utils.py @@ -37,6 +37,9 @@ def _catch_deprecated_fields_with_arguments(votable_field): if votable_field.startswith("bibcodelist("): raise ValueError("Selecting a range of years for bibcode is removed. You can still use " "bibcodelist without parenthesis and get the full list of bibliographic references.") + if votable_field in ["membership", "link_bibcode"]: + raise ValueError("The hierarchy information is no longer an additional field. " + "It has been replaced by the 'query_hierarchy' method.") # ---------------------------- # Support wildcard argument diff --git a/docs/simbad/simbad.rst b/docs/simbad/simbad.rst index 3424ed53f6..842afcb110 100644 --- a/docs/simbad/simbad.rst +++ b/docs/simbad/simbad.rst @@ -155,6 +155,96 @@ associated with an object. NAME North Star WEB 2438 +Query to get all parents (or children, or siblings) of an object +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Let's find the galaxies composing the galaxy pair ``Mrk 116``: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> galaxies = Simbad.query_hierarchy("Mrk 116", + ... hierarchy="children", criteria="otype='G..'") + >>> galaxies[["main_id", "ra", "dec"]] +
+ main_id ra dec + deg deg + object float64 float64 + --------- --------------- -------------- + Mrk 116A 143.50821525019 55.24105273196 + Mrk 116B 143.509956 55.239762 + +Alternatively, if we know one member of a group, we can find the others by asking for +``siblings``: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> galaxies = Simbad.query_hierarchy("Mrk 116A", + ... hierarchy="siblings", criteria="otype='G..'") + >>> galaxies[["main_id", "ra", "dec"]] +
+ main_id ra dec + deg deg + object float64 float64 + --------- --------------- -------------- + Mrk 116A 143.50821525019 55.24105273196 + Mrk 116B 143.509956 55.239762 + +Note that if we had not added the criteria on the object type, we would also get +some stars that are part of these galaxies in the result. + +And the other way around, let's find which cluster of stars contains +``2MASS J18511048-0615470``: + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> cluster = Simbad.query_hierarchy("2MASS J18511048-0615470", hierarchy="parents") + >>> cluster[["main_id", "ra", "dec"]] +
+ main_id ra dec + deg deg + object float64 float64 + --------- ------- ------- + NGC 6705 282.766 -6.272 + +If needed, we can get a more detailed report with the two extra columns: + - ``hierarchy_bibcode`` : the paper in which the hierarchy is established, + - ``membership_certainty``: if present in the paper, a certainty index (100 meaning + 100% sure). + +.. doctest-remote-data:: + + >>> from astroquery.simbad import Simbad + >>> cluster = Simbad.query_hierarchy("2MASS J18511048-0615470", + ... hierarchy="parents", + ... detailed_hierarchy=True) + >>> cluster[["main_id", "ra", "dec", "hierarchy_bibcode", "membership_certainty"]] +
+ main_id ra dec hierarchy_bibcode membership_certainty + deg deg percent + object float64 float64 object int16 + --------- ------- ------- ------------------- -------------------- + NGC 6705 282.766 -6.272 2014A&A...563A..44M 100 + NGC 6705 282.766 -6.272 2015A&A...573A..55T 100 + NGC 6705 282.766 -6.272 2016A&A...591A..37J 100 + NGC 6705 282.766 -6.272 2018A&A...618A..93C 100 + NGC 6705 282.766 -6.272 2020A&A...633A..99C 100 + NGC 6705 282.766 -6.272 2020A&A...640A...1C 100 + NGC 6705 282.766 -6.272 2020A&A...643A..71G 100 + NGC 6705 282.766 -6.272 2020ApJ...903...55P 100 + NGC 6705 282.766 -6.272 2020MNRAS.496.4701J 100 + NGC 6705 282.766 -6.272 2021A&A...647A..19T 100 + NGC 6705 282.766 -6.272 2021A&A...651A..84M 100 + NGC 6705 282.766 -6.272 2021MNRAS.503.3279S 99 + NGC 6705 282.766 -6.272 2022MNRAS.509.1664J 100 + +Here, we see that the Simbad team found 13 papers mentioning the fact that +``2MASS J18511048-0615470`` is a member of ``NGC 6705`` and that the authors of these +articles gave high confidence indices for this membership (``membership_certainty`` is +close to 100 for all bibcodes). + Query a region ^^^^^^^^^^^^^^ @@ -421,6 +511,7 @@ Some query methods outputs can be customized. This is the case for: - `~astroquery.simbad.SimbadClass.query_objects` - `~astroquery.simbad.SimbadClass.query_region` - `~astroquery.simbad.SimbadClass.query_catalog` +- `~astroquery.simbad.SimbadClass.query_hierarchy` - `~astroquery.simbad.SimbadClass.query_bibobj` For these methods, the default columns in the output are: @@ -523,6 +614,7 @@ Most query methods take a ``criteria`` argument. They are listed here: - `~astroquery.simbad.SimbadClass.query_objects` - `~astroquery.simbad.SimbadClass.query_region` - `~astroquery.simbad.SimbadClass.query_catalog` +- `~astroquery.simbad.SimbadClass.query_hierarchy` - `~astroquery.simbad.SimbadClass.query_bibobj` - `~astroquery.simbad.SimbadClass.query_bibcode` - `~astroquery.simbad.SimbadClass.query_objectids`