Source code for sbpy.data.names

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
======================
sbpy data.Names Module
======================

Class for dealing with object naming conventions.

created on August 28, 2017

"""

import re
from ..exceptions import SbpyException

__all__ = ['Names', 'TargetNameParseError', 'natural_sort_key']


[docs]def natural_sort_key(s): """List sort keys considering strings of numbers as integers. Intended to be used with `list.sort` or the `sorted` built-in function. Parameters ---------- s : string String to parse into keys. Returns ------- keys : tuple Keys for sorting. Examples -------- >>> from sbpy.data.names import natural_sort_key >>> comets = ['9P/Tempel 1', ... '101P/Chernykh', ... '10P/Tempel 2', ... '2P/Encke'] >>> sorted(comets) ['101P/Chernykh', '10P/Tempel 2', '2P/Encke', '9P/Tempel 1'] >>> sorted(comets, key=natural_sort_key) ['2P/Encke', '9P/Tempel 1', '10P/Tempel 2', '101P/Chernykh'] """ keys = tuple() for k in re.split('([0-9]+)', str(s)): keys += (int(k) if k.isdigit() else k,) return keys
class TargetNameParseError(SbpyException): pass
[docs]class Names(): """Class for parsing target identifiers. The functions in this class will identify designation, name strings, and number for both comets and asteroids. It also includes functionality to distinguish between comet and asteroid identifiers.""" # packed numbers translation string pkd = ('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ' 'abcdefghijklmnopqrstuvwxyz')
[docs] @staticmethod def to_packed(s): """Convert designation or number to packed identifier. Parameters ---------- s : str Target identifier. Returns ------- p : str Packed designation/number. Examples -------- >>> from sbpy.data import Names >>> Names.to_packed('1995 AA1') 'J95A01A' """ if s.isdigit(): # number s = int(s) if s < 100000: return '{:05d}'.format(s) elif s > 619999: raise TargetNameParseError( '{} cannot be turned into a packed number'.format(s) ) else: mod = (s % 10000) return '{}{:04d}'.format( Names.pkd[int((s - mod) / 10000)], mod) elif s.endswith('P-L'): return 'PLS{}'.format(s[:4]) elif s[-3:] in ['T-1', 'T-2', 'T-3']: return 'T{}S{}'.format(s[-1], s[:4]) elif s[:4].isdigit() and (s[5:].isalnum() or s[5:s.find('-')].isalnum()): # cometary or minor planet temporary designation # when the half-month and number are two digits: cometary # when there is a trailing fragment designation: cometary # otherwise: minor planet if ( (s[5].isalpha() and s[6:].isdigit()) or (s[-2] == '-' and s[-1].isalpha()) ): if s[-2] == '-': frag = s[-1] num = s[6:-2] else: frag = '0' num = s[6:] if num == '': num = '00' elif len(num) == 1: num = '0' + num elif len(num) > 2: try: num = Names.pkd[int(num[:-1])]+num[-1] except (IndexError, ValueError): raise TargetNameParseError( ('{} cannot be turned into a ' 'packed designation').format(s)) return '{}{}{}{}{}'.format( Names.pkd[int(float(s[:2]))], s[2:4], s[5], num, frag.lower() ) else: yr = s.strip()[:4] yr = Names.pkd[int(yr[:2])] + yr[2:] let = s.strip()[4:7].strip() num = s.strip()[7:].strip() if num == '': num = '00' elif len(num) == 1: num = '0' + num elif len(num) > 2: try: num = Names.pkd[int(num[:-1])]+num[-1] except (IndexError, ValueError): raise TargetNameParseError( ('{} cannot be turned into a ' 'packed designation').format(s)) return (yr + let[0] + num + let[1]) else: raise TargetNameParseError( ('{} cannot be turned into a ' 'packed number or designation').format(s))
[docs] @staticmethod def from_packed(p): """Unpack asteroid designation/number. Parameters ---------- p : str Packed target identifier. Returns ------- s : str Unpacked designation/number. Examples -------- >>> from sbpy.data import Names >>> Names.from_packed('J95A01A') '1995 AA1' """ # packed number if p.isdigit(): return int(p) elif p[0].isalpha() and p[1:].isdigit(): return int(str(Names.pkd.find(p[0])) + p[1:]) # old designation style, e.g.: 1989AB if (len(p.strip()) < 7 and p[:4].isdigit() and p[4:6].isalpha()): return p[:4]+' '+p[4:6] # Palomar Survey elif p.startswith("PLS"): return p[3:] + " P-L" # Trojan Surveys elif p.startswith("T1S"): return p[3:] + " T-1" elif p.startswith("T2S"): return p[3:] + " T-2" elif p.startswith("T3S"): return p[3:] + " T-3" # insert blank in designations elif (p[0:4].isdigit() and p[4:6].isalpha() and p[4] != ' '): return p[:4] + " " + p[4:] # MPC packed 7-digit designation elif (p[0].isalpha() and p[1:3].isdigit() and p[-2].isdigit()): return '{}{} {}{}{}{}'.format( str(Names.pkd.find(p[0])), p[1:3], p[3], p[6] if (p[6].isalpha() and p[6].isupper()) else '', (str(Names.pkd.find(p[4])) + p[5]).lstrip('0'), '-{}'.format(p[6].upper()) if p[6].islower() else '' ) else: # nothing to do return p
[docs] @staticmethod def parse_comet(s): """Parse a string as if it were a comet name. Considers IAU-formatted permanent and new-style designations. Note that comet types (P, D, C etc) are required and letter case is important. Parameters ---------- s : str or list/array of str String, or a list/array of strings, to parse. Returns ------- r : dict The dictionary contains the components identified from ``s``: number, orbit type, designation, name, and/or fragment. If none of these components are identified, a `TargetNameParseError` is raised. Raises ------ TargetNameParseError : Exception If the string does not appear to be a comet name. Notes ----- This function has absolutely no knowledge whether the Solar System small body ``s`` is an asteroid or a comet. It simply searches for common patterns in string ``s`` that are common for comet names and designations. For instance, if ``s`` contains an asteroid name, this function will identify that part as a comet name. Hence, the user is advised to take that into account when interpreting the parsing results. Examples -------- >>> from sbpy.data import Names >>> tempel = Names.parse_comet('9P/Tempel 1') >>> tempel['type'], tempel['name'] ('P', 'Tempel 1') >>> linear = Names.parse_comet('C/2001 A2-A (LINEAR)') >>> linear['desig'], linear['fragment'], linear['name'] ('2001 A2', 'A', 'LINEAR') The following table shows results of the parsing: +------------------------------+------+----+--------+----------+------------------------+ |targetname |number|type|fragmemt|desig |name | +==============================+======+====+========+==========+========================+ |1P/Halley | 1 | P | | |Halley | +------------------------------+------+----+--------+----------+------------------------+ |3D/Biela | 3 | D | | |Biela | +------------------------------+------+----+--------+----------+------------------------+ |P/Encke | | P | | |Encke | +------------------------------+------+----+--------+----------+------------------------+ |9P/Tempel 1 | 9 | P | | |Tempel 1 | +------------------------------+------+----+--------+----------+------------------------+ |73P/Schwassmann-Wachmann 3 C | 73 | P | | |Schwassmann-Wachmann 3 C| +------------------------------+------+----+--------+----------+------------------------+ |73P-C/Schwassmann-Wachmann 3 C| 73 | P | C | |Schwassmann-Wachmann 3 C| +------------------------------+------+----+--------+----------+------------------------+ |73P-BB | 73 | P | BB | | | +------------------------------+------+----+--------+----------+------------------------+ |322P | 322 | P | | | | +------------------------------+------+----+--------+----------+------------------------+ |X/1106 C1 | | X | | 1066 C1 | | +------------------------------+------+----+--------+----------+------------------------+ |P/1994 N2 (McNaught-Hartley) | | P | | 1994 N2 |McNaught-Hartley | +------------------------------+------+----+--------+----------+------------------------+ |P/2001 YX127 (LINEAR) | | P | |2001 YX127|LINEAR | +------------------------------+------+----+--------+----------+------------------------+ |P/2010 WK (LINEAR) | | P | | 2010 WK |LINEAR | +------------------------------+------+----+--------+----------+------------------------+ |C/-146 P1 | | C | | -146 P1 | | +------------------------------+------+----+--------+----------+------------------------+ |C/2001 A2-A (LINEAR) | | C | A | 2001 A2 |LINEAR | +------------------------------+------+----+--------+----------+------------------------+ |C/2013 US10 | | C | | 2013 US10| | +------------------------------+------+----+--------+----------+------------------------+ |C/2015 V2 (Johnson) | | C | | 2015 V2 |Johnson | +------------------------------+------+----+--------+----------+------------------------+ """ # define comet matching pattern pat = ('^(([1-9][0-9]*[PDCX]' '(-[A-Z]{1,2})?)|[PDCX]/)' # type/number/fragm [0,1,2] '|([-]?[0-9]{3,4}[ _][A-Z]{1,2}[0-9]{0,3}(-[1-9A-Z]{0,2})?)' # designation [3,4] r'|((([dvA-Z][a-z\']? ?[A-Za-z\-]*)[ -]?[A-Z]?[1-9]*[a-z]*)' '( [1-9A-Z]{1,2})*)' # name [5,6] ) # regex patterns that will be rejected rej_pat = ('^(([1-9][0-9]*[pdcxiaCXIA]\b)' # numbered with lower case, X, C, I, or A '|([pdcxaiAI]/))' # temporary designation with lower case, I, or A ) raw = s.translate(str.maketrans('()', ' ')).strip() # reject rej_pat patterns rej = re.findall(rej_pat, raw) if len(rej) > 0: raise TargetNameParseError('{} does not appear to be a ' 'comet identifier'.format(s)) m = re.findall(pat, s) r = {} if len(m) > 0: for el in m: # type & number & fragment if len(el[0]) > 0: typnumber = el[0].replace('/', '') try: r['type'] = re.findall('[PDCX]', typnumber)[0] except IndexError: pass try: r['number'] = int(re.findall('[0-9]*', typnumber)[0]) except (IndexError, ValueError): pass try: r['fragment'] = re.findall('-[A-Z]{1,2}', typnumber)[0][1:] except IndexError: pass # designation & fragment if len(el[3]) > 0: r['desig'] = el[3].replace('_', ' ') try: r['fragment'] = re.findall('-[A-Z]{1,2}', r['desig'])[0][1:] r['desig'] = r['desig'][:r['desig'].find( '-' + r['fragment'])] except IndexError: pass # name if len(el[5]) > 0: if len(el[5]) > 1: r['name'] = el[5] if len(r) == 0 or 'type' not in r: raise TargetNameParseError(('{} does not appear to be a ' 'comet name').format(s)) else: return r
[docs] @staticmethod def parse_asteroid(s): """Parse a string as if it were an asteroid name. Considers IAU-formatted permanent and new-style designations, as well as MPC packed designations and numbers. Note that letter case is important. Parentheses are ignored in the parsing. Parameters ---------- s : str or list of str The string, or a list/array of strings, to parse. Returns ------- r : dict The dictionary contains the components identified from ``s``: IAU number, designation, and/or name. If none of these components are identified, a `TargetNameParseError` is raised Raises ------ TargetNameParseError : Exception If the string does not appear to be an asteroid name. Notes ----- This function has absolutely no knowledge whether the Solar System small body ``s`` is an asteroid or a comet. It simply searches for common patterns in string ``s`` that are common for asteroid names, numbers, or designations. For instance, if ``s`` contains a comet name, this function will identify that part as an asteroid name. Hence, the user is advised to take that into account when interpreting the parsing results. Examples -------- >>> from sbpy.data import Names >>> ceres = Names.parse_asteroid('(1) Ceres') >>> ceres['number'], ceres['name'] (1, 'Ceres') >>> mu = Names.parse_asteroid('2014 MU69') >>> mu['desig'] '2014 MU69' The following table shows results of the parsing: +----------------------------------+----------+------+-----------------+ |targetname |desig |number|name | +==================================+==========+======+=================+ |1 | |1 | | +----------------------------------+----------+------+-----------------+ |2 Pallas | |2 |Pallas | +----------------------------------+----------+------+-----------------+ |\(2001\) Einstein | |2001 |Einstein | +----------------------------------+----------+------+-----------------+ |1714 Sy | |1714 |Sy | +----------------------------------+----------+------+-----------------+ |2014 MU69 |2014 MU69 | | | +----------------------------------+----------+------+-----------------+ |\(228195\) 6675 P-L |6675 P-L |228195| | +----------------------------------+----------+------+-----------------+ |4101 T-3 |4101 T-3 | | | +----------------------------------+----------+------+-----------------+ |4015 Wilson-Harrington \(1979 VA\)|1979 VA |4015 |Wilson-Harrington| +----------------------------------+----------+------+-----------------+ |J95X00A |1995 XA | | | +----------------------------------+----------+------+-----------------+ |K07Tf8A |2007 TA418| | | +----------------------------------+----------+------+-----------------+ |G3693 | |163693| | +----------------------------------+----------+------+-----------------+ |1A |1A | | | +----------------------------------+----------+------+-----------------+ |A/2018 V3 |2018 V3 | | | +----------------------------------+----------+------+-----------------+ """ pat = ('(([1A][8-9][0-9]{2}[ _][A-Z]{2}[0-9]{0,3}|' '20[0-9]{2}[ _][A-Z]{2}[0-9]{0,3})' # designation [0,1] '|([1-9][0-9]{3}[ _](P-L|T-[1-3])))' # Palomar-Leiden [0,2,3] '|([IJKL][0-9]{2}[A-Z][0-9a-z][0-9][A-Z]' '|PLS[1-9][0-9]{3}|T1S[1-9][0-9]{3}|T2S[1-9][0-9]{3}' '|T3S[1-9][0-9]{3})' # packed desig [4] '|(^[A-Za-z][0-9]{4}| [A-Za-z][0-9]{4})' # packed number [5] '|([A-Z]{3,} |[A-Z]{3,}$' # capitalized acronyms '|van de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *' '|de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *' "|['`]?[A-Z][A-Z]*['`]?[a-z][a-z]*['`]?[^0-9]*" "[ -]?[A-Z]?[a-z]*[^0-9]*)" # name [6] '|((^|\b)[1-9][0-9]*(\b|$| |_))' # number [7,8] '|^(([1-9][0-9]*A))' # comet-style designations: 1A [10] '|^A/([12][0-9][0-9][0-9] [A-Z][0-9]+)' # asteroids with cometary orbits [12] ) # regex patterns that will be rejected rej_pat = ('([CPXDI]/[1-2][0-9]{0,3}[ _][A-Z][0-9]*(\b|$))' # comet or interstellar desig '|([1-9][0-9]*[PDCXI]\b)' # comet or interstellar number '|([PDCXI]/)' # comet type '|([1-2][0-9]{0,3}[ _][a-z]{2}[0-9]{0,3})' ) raw = s.translate(str.maketrans('()_', ' ')).strip() # reject rej_pat patterns rej = re.findall(rej_pat, raw) if len(rej) > 0: raise TargetNameParseError('{} does not appear to be an ' 'asteroid identifier'.format(s)) # match target patterns m = re.findall(pat, raw) r = {} if len(m) > 0: for el in m: # designation if len(el[0]) > 0: if el[0][0] == 'A': r['desig'] = '1'+el[0][1:] else: r['desig'] = el[0] # packed designation elif len(el[4]) > 0: ident = el[4] r['desig'] = Names.from_packed(ident) # packed number elif len(el[5]) > 0: ident = el[5] r['number'] = Names.from_packed(ident) # number elif len(el[7]) > 0: r['number'] = int(float(el[7])) # name elif len(el[6]) > 0: if len(el[6].strip()) > 1: r['name'] = el[6].strip() # comet-style designation elif len(el[10]) > 0: r['desig'] = el[10].strip() elif len(el[12]) > 0: r['desig'] = el[12].strip() if len(r) == 0: raise TargetNameParseError(('{} does not appear to be an ' 'asteroid name'.format(s))) else: return r
[docs] @staticmethod def asteroid_or_comet(s): """Checks if an object identifier is more likely to belong to an asteroid or a comet. Parameters ---------- s : str Target identifier. Returns ------- target_type : str The target identification: ``'comet'`` or ``'asteroid'``. Notes ----- This function uses the results of `~sbpy.data.Names.parse_asteroid` and `~sbpy.data.Names.parse_comet`. Hence, it is affected by ambiguities in the name/number/designation identification. If the name is ambiguous, a `~sbpy.data.names.TargetNameParseError` will be raised. Note that for any identifier that does not contain a comet type (P, D, C etc.), it is likely that the object gets identified as an asteroid. Examples -------- >>> from sbpy.data import Names >>> print(Names.asteroid_or_comet('2P')) comet >>> print(Names.asteroid_or_comet('(1) Ceres')) asteroid """ # compare lengths of dictionaries from parse_asteroid and # parse_comet; the longer one is more likely to describe the # nature of the target, if both dictionaries have the same # length, the nature is ambiguous ast = {} com = {} try: com = Names.parse_comet(s) except TargetNameParseError: pass try: ast = Names.parse_asteroid(s) except TargetNameParseError: pass if len(ast) > 0 and len(com) == 0: return 'asteroid' elif len(com) > 0 and len(ast) == 0: return 'comet' else: raise TargetNameParseError('Target nature unclear.')