Source code for sbpy.data.names

# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""
======================
sbpy data.Names Module
======================

Class for dealing with object naming conventions.

created on August 28, 2017

"""

import re
import math
from ..exceptions import SbpyException

__all__ = ['Names', 'TargetNameParseError', 'natural_sort_key']



[docs]
def natural_sort_key(s):
    """List sort keys considering strings of numbers as integers.

    Intended to be used with `list.sort` or the `sorted` built-in
    function.


    Parameters
    ----------
    s : string
        String to parse into keys.


    Returns
    -------
    keys : tuple
        Keys for sorting.


    Examples
    --------
    >>> from sbpy.data.names import natural_sort_key
    >>> comets = ['9P/Tempel 1',
    ...           '101P/Chernykh',
    ...           '10P/Tempel 2',
    ...           '2P/Encke']
    >>> sorted(comets)
    ['101P/Chernykh', '10P/Tempel 2', '2P/Encke', '9P/Tempel 1']
    >>> sorted(comets, key=natural_sort_key)
    ['2P/Encke', '9P/Tempel 1', '10P/Tempel 2', '101P/Chernykh']

    """
    keys = tuple()
    for k in re.split('([0-9]+)', str(s)):
        keys += (int(k) if k.isdigit() else k,)
    return keys



class TargetNameParseError(SbpyException):
    pass



[docs]
class Names():
    """Class for parsing target identifiers. The functions in this class will
    identify designation, name strings, and number for both comets and
    asteroids. It also includes functionality to distinguish between comet and
    asteroid identifiers."""

    # packed numbers translation string
    pkd = ('0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
           'abcdefghijklmnopqrstuvwxyz')

    # packed numbers translation string with no I
    pkd_noI = 'ABCDEFGHJKLMNOPQRSTUVWXYZ'


[docs]
    @staticmethod
    def to_packed(s):
        """Convert designation or number to packed identifier.

        Parameters
        ----------
        s : str
           Target identifier.

        Returns
        -------
        p : str
           Packed designation/number.

        Examples
        --------
        >>> from sbpy.data import Names
        >>> Names.to_packed('1995 AA1')
        'J95A01A'
        """

        if s.isdigit():
            # number
            s = int(s)
            if s < 100000:
                return '{:05d}'.format(s)
            elif (s > 99999 and s < 620000):
                mod = (s % 10000)
                return '{}{:04d}'.format(
                    Names.pkd[int((s - mod) / 10000)], mod)
            elif (s > 619999 and s < 15396336):
                s = s - 620000
                d = ['0', '0', '0', '0']
                for idx in reversed(range(0, 4)):
                    d[idx] = Names.pkd[math.floor(s % 62)]
                    s //= 62
                return ('~'+''.join(d))
            else:
                raise TargetNameParseError(
                    '{} cannot be turned into a packed number'.format(s)
                )

        elif s.endswith('P-L'):
            return 'PLS{}'.format(s[:4])
        elif s[-3:] in ['T-1', 'T-2', 'T-3']:
            return 'T{}S{}'.format(s[-1], s[:4])
        elif s[:4].isdigit() and (s[5:].isalnum() or s[5:s.find('-')].isalnum()):
            # cometary or minor planet temporary designation

            # when the half-month and number are two digits: cometary
            # when there is a trailing fragment designation: cometary
            # otherwise: minor planet
            if (
                (s[5].isalpha() and s[6:].isdigit())
                or (s[-2] == '-' and s[-1].isalpha())
            ):
                if s[-2] == '-':
                    frag = s[-1]
                    if len(s[:-2]) > 6:
                        num = s[6:-2]
                    else:
                        raise TargetNameParseError(
                            ('{} cannot be turned into a '
                             'packed designation').format(s))
                else:
                    frag = '0'
                    num = s[6:]

                try:
                    if num == '':
                        num = '00'
                    elif len(num) == 1:
                        num = '0' + num
                    elif len(num) > 2:
                        num = Names.pkd[int(num[:-1])]+num[-1]
                    return '{}{}{}{}{}'.format(
                        Names.pkd[int(float(s[:2]))],
                        s[2:4],
                        s[5],
                        num,
                        frag.lower()
                    )
                except (IndexError, ValueError):
                    raise TargetNameParseError(
                        ('{} cannot be turned into a '
                         'packed designation').format(s))
            else:
                try:
                    yr = s.strip()[:4]
                    yr = Names.pkd[int(yr[:2])] + yr[2:]
                    let = s.strip()[4:7].strip()
                    num = s.strip()[7:].strip()

                    if num == '':
                        return (yr + let[0] + '00' + let[1])
                    elif len(num) == 1:
                        return (yr + let[0] + '0' + num + let[1])
                    elif len(num) > 1:
                        obj_num = int(num)*25 + Names.pkd_noI.find(let[1]) + 1
                        # use original packed desigs for first 15500 objs per month
                        if obj_num < 15501:
                            num = Names.pkd[int(num[:-1])]+num[-1]
                            return (yr + let[0] + num + let[1])
                        # use extended packed desigs for >15500 objs per month
                        elif obj_num < 14791837:
                            obj_num = obj_num - 15501
                            year = Names.pkd[int(yr[1:])]
                            month = let[0]
                            d = ['0', '0', '0', '0']
                            for idx in reversed(range(0, 4)):
                                d[idx] = Names.pkd[math.floor(obj_num % 62)]
                                obj_num //= 62
                            return ('_'+Names.pkd[int(yr[1:])]+let[0]+''.join(d))
                        # if more than maximum of 14,791,836 objects per half-month
                        # accommodated by the extended provisional designation scheme
                        else:
                            raise TargetNameParseError(
                                ('{} cannot be turned into a '
                                 'packed number or designation').format(s))
                except (IndexError, ValueError):
                    raise TargetNameParseError(
                        ('{} cannot be turned into a '
                         'packed number or designation').format(s))
        else:
            raise TargetNameParseError(
                ('{} cannot be turned into a '
                 'packed number or designation').format(s))



[docs]
    @staticmethod
    def from_packed(p):
        """Unpack asteroid designation/number.

        Parameters
        ----------
        p : str
           Packed target identifier.

        Returns
        -------
        s : str
           Unpacked designation/number.

        Examples
        --------
        >>> from sbpy.data import Names
        >>> Names.from_packed('J95A01A')
        '1995 AA1'
        """
        # packed number
        if p.isdigit():
            return int(p)
        elif p[0].isalpha() and p[1:].isdigit():
            return int(str(Names.pkd.find(p[0])) + p[1:])
        elif p[0] == '~' and p[1:].isalnum():
            if len(p) == 5:
                obj_num = 620000 + Names.pkd.find(p[1])*(62**3) \
                    + Names.pkd.find(p[2])*(62**2) \
                    + Names.pkd.find(p[3])*(62) \
                    + Names.pkd.find(p[4])
                return int(obj_num)
            else:
                raise TargetNameParseError(
                    ('{} cannot be turned into an '
                     'unpacked designation').format(p))

        # old designation style, e.g.: 1989AB
        if (len(p.strip()) < 7 and p[:4].isdigit() and p[4:6].isalpha()):
            return p[:4]+' '+p[4:6]
        # Palomar Survey
        elif p.startswith("PLS"):
            return p[3:] + " P-L"
        # Trojan Surveys
        elif p.startswith("T1S"):
            return p[3:] + " T-1"
        elif p.startswith("T2S"):
            return p[3:] + " T-2"
        elif p.startswith("T3S"):
            return p[3:] + " T-3"
        # insert blank in designations
        elif (p[0:4].isdigit() and p[4:6].isalpha() and p[4] != ' '):
            return p[:4] + " " + p[4:]
        # MPC packed 7-digit designation
        elif (p[0].isalpha() and p[1:3].isdigit() and p[-2].isdigit()):
            return '{}{} {}{}{}{}'.format(
                str(Names.pkd.find(p[0])),
                p[1:3],
                p[3],
                p[6] if (p[6].isalpha() and p[6].isupper()) else '',
                (str(Names.pkd.find(p[4])) + p[5]).lstrip('0'),
                '-{}'.format(p[6].upper()) if p[6].islower() else ''
            )
        # MPC extended packed provisional designation
        elif p[0] == '_':
            if (
                (p[1].isalpha() and p[1].isupper())
                and re.search("[A-H,J-Y]", p[2])
                and p[3:].isalnum()
                and len(p) == 7
            ):
                obj_num = 15501 + Names.pkd.find(p[3])*(62**3) \
                    + Names.pkd.find(p[4])*(62**2) \
                    + Names.pkd.find(p[5])*(62) \
                    + Names.pkd.find(p[6])
                return '20{} {}{}{}'.format(
                    str(Names.pkd.find(p[1])),
                    p[2],
                    Names.pkd_noI[((obj_num-1) % 25)],
                    math.floor((obj_num-1)/25))
            else:
                raise TargetNameParseError(
                    ('{} cannot be turned into an '
                     'unpacked designation').format(p))
        else:
            # nothing to do
            return p



[docs]
    @staticmethod
    def parse_comet(s):
        """Parse a string as if it were a comet name.

        Considers IAU-formatted permanent and new-style
        designations. Note that comet types (P, D, C etc) are required
        and letter case is important.

        Parameters
        ----------
        s : str or list/array of str
           String, or a list/array of strings, to parse.

        Returns
        -------
        r : dict
           The dictionary contains the components identified from ``s``:
           number, orbit type, designation, name, and/or fragment. If
           none of these components are identified, a
           `TargetNameParseError` is raised.

        Raises
        ------
        TargetNameParseError : Exception
           If the string does not appear to be a comet name.

        Notes
        -----
        This function has absolutely no knowledge whether the Solar
        System small body ``s`` is an asteroid or a comet. It simply
        searches for common patterns in string ``s`` that are common for
        comet names and designations. For instance, if ``s`` contains an
        asteroid name, this function will identify that part as a
        comet name. Hence, the user is advised to take that into
        account when interpreting the parsing results.


        Examples
        --------
        >>> from sbpy.data import Names
        >>> tempel = Names.parse_comet('9P/Tempel 1')
        >>> tempel['type'], tempel['name']
        ('P', 'Tempel 1')
        >>> linear = Names.parse_comet('C/2001 A2-A (LINEAR)')
        >>> linear['desig'], linear['fragment'], linear['name']
        ('2001 A2', 'A', 'LINEAR')

        The following table shows results of the parsing:

        +------------------------------+------+----+--------+----------+------------------------+
        |targetname                    |number|type|fragmemt|desig     |name                    |
        +==============================+======+====+========+==========+========================+
        |1P/Halley                     | 1    | P  |        |          |Halley                  |
        +------------------------------+------+----+--------+----------+------------------------+
        |3D/Biela                      | 3    | D  |        |          |Biela                   |
        +------------------------------+------+----+--------+----------+------------------------+
        |P/Encke                       |      | P  |        |          |Encke                   |
        +------------------------------+------+----+--------+----------+------------------------+
        |9P/Tempel 1                   | 9    | P  |        |          |Tempel 1                |
        +------------------------------+------+----+--------+----------+------------------------+
        |73P/Schwassmann-Wachmann 3 C  | 73   | P  |        |          |Schwassmann-Wachmann 3 C|
        +------------------------------+------+----+--------+----------+------------------------+
        |73P-C/Schwassmann-Wachmann 3 C| 73   | P  | C      |          |Schwassmann-Wachmann 3 C|
        +------------------------------+------+----+--------+----------+------------------------+
        |73P-BB                        | 73   | P  | BB     |          |                        |
        +------------------------------+------+----+--------+----------+------------------------+
        |322P                          | 322  | P  |        |          |                        |
        +------------------------------+------+----+--------+----------+------------------------+
        |X/1106 C1                     |      | X  |        | 1066 C1  |                        |
        +------------------------------+------+----+--------+----------+------------------------+
        |P/1994 N2 (McNaught-Hartley)  |      | P  |        |  1994 N2 |McNaught-Hartley        |
        +------------------------------+------+----+--------+----------+------------------------+
        |P/2001 YX127 (LINEAR)         |      | P  |        |2001 YX127|LINEAR                  |
        +------------------------------+------+----+--------+----------+------------------------+
        |P/2010 WK (LINEAR)            |      | P  |        | 2010 WK  |LINEAR                  |
        +------------------------------+------+----+--------+----------+------------------------+
        |C/-146 P1                     |      | C  |        | -146 P1  |                        |
        +------------------------------+------+----+--------+----------+------------------------+
        |C/2001 A2-A (LINEAR)          |      | C  | A      | 2001 A2  |LINEAR                  |
        +------------------------------+------+----+--------+----------+------------------------+
        |C/2013 US10                   |      | C  |        | 2013 US10|                        |
        +------------------------------+------+----+--------+----------+------------------------+
        |C/2015 V2 (Johnson)           |      | C  |        | 2015 V2  |Johnson                 |
        +------------------------------+------+----+--------+----------+------------------------+

        """

        # define comet matching pattern
        pat = ('^(([1-9][0-9]*[PDCX]'
               '(-[A-Z]{1,2})?)|[PDCX]/)'  # type/number/fragm [0,1,2]
               '|([-]?[0-9]{3,4}[ _][A-Z]{1,2}[0-9]{0,3}(-[1-9A-Z]{0,2})?)'
               # designation [3,4]
               r'|((([dvA-Z][a-z\']? ?[A-Za-z\-]*)[ -]?[A-Z]?[1-9]*[a-z]*)'
               '( [1-9A-Z]{1,2})*)'  # name [5,6]
               )

        # regex patterns that will be rejected
        rej_pat = ('^(([1-9][0-9]*[pdcxiaCXIA]\b)'
                   # numbered with lower case, X, C, I, or A
                   '|([pdcxaiAI]/))'
                   # temporary designation with lower case, I, or A
                   )

        raw = s.translate(str.maketrans('()', '  ')).strip()

        # reject rej_pat patterns
        rej = re.findall(rej_pat, raw)

        if len(rej) > 0:
            raise TargetNameParseError('{} does not appear to be a '
                                       'comet identifier'.format(s))

        m = re.findall(pat, s)

        r = {}

        if len(m) > 0:
            for el in m:
                # type & number & fragment
                if len(el[0]) > 0:
                    typnumber = el[0].replace('/', '')
                    try:
                        r['type'] = re.findall('[PDCX]', typnumber)[0]
                    except IndexError:
                        pass
                    try:
                        r['number'] = int(re.findall('[0-9]*', typnumber)[0])
                    except (IndexError, ValueError):
                        pass
                    try:
                        r['fragment'] = re.findall('-[A-Z]{1,2}',
                                                   typnumber)[0][1:]
                    except IndexError:
                        pass
                # designation & fragment
                if len(el[3]) > 0:
                    r['desig'] = el[3].replace('_', ' ')
                    try:
                        r['fragment'] = re.findall('-[A-Z]{1,2}',
                                                   r['desig'])[0][1:]
                        r['desig'] = r['desig'][:r['desig'].find(
                            '-' + r['fragment'])]
                    except IndexError:
                        pass
                # name
                if len(el[5]) > 0:
                    if len(el[5]) > 1:
                        r['name'] = el[5]
                        if r['name'][1].isdigit():
                            raise TargetNameParseError('{} does not appear to be a '
                                                       ' comet identifier'.format(s))

        if len(r) == 0 or 'type' not in r:
            raise TargetNameParseError(('{} does not appear to be a '
                                        'comet name').format(s))
        else:
            return r



[docs]
    @staticmethod
    def parse_asteroid(s):
        """Parse a string as if it were an asteroid name.

        Considers IAU-formatted permanent and new-style designations,
        as well as MPC packed designations and numbers. Note that
        letter case is important. Parentheses are ignored in the parsing.

        Parameters
        ----------
        s : str or list of str
           The string, or a list/array of strings, to parse.

        Returns
        -------
        r : dict
           The dictionary contains the components identified from ``s``:
           IAU number, designation, and/or name. If none of these
           components are identified, a `TargetNameParseError` is raised

        Raises
        ------
        TargetNameParseError : Exception
           If the string does not appear to be an asteroid name.

        Notes
        -----
        This function has absolutely no knowledge whether the Solar
        System small body ``s`` is an asteroid or a comet. It simply
        searches for common patterns in string ``s`` that are common
        for asteroid names, numbers, or designations. For instance, if
        ``s`` contains a comet name, this function will identify that
        part as an asteroid name. Hence, the user is advised to take
        that into account when interpreting the parsing results.

        Examples
        --------
        >>> from sbpy.data import Names
        >>> ceres = Names.parse_asteroid('(1) Ceres')
        >>> ceres['number'], ceres['name']
        (1, 'Ceres')
        >>> mu = Names.parse_asteroid('2014 MU69')
        >>> mu['desig']
        '2014 MU69'

        The following table shows results of the parsing:

        +----------------------------------+----------+------+-----------------+
        |targetname                        |desig     |number|name             |
        +==================================+==========+======+=================+
        |1                                 |          |1     |                 |
        +----------------------------------+----------+------+-----------------+
        |2 Pallas                          |          |2     |Pallas           |
        +----------------------------------+----------+------+-----------------+
        |(2001) Einstein                   |          |2001  |Einstein         |
        +----------------------------------+----------+------+-----------------+
        |1714 Sy                           |          |1714  |Sy               |
        +----------------------------------+----------+------+-----------------+
        |2014 MU69                         |2014 MU69 |      |                 |
        +----------------------------------+----------+------+-----------------+
        |(228195) 6675 P-L                 |6675 P-L  |228195|                 |
        +----------------------------------+----------+------+-----------------+
        |4101 T-3                          |4101 T-3  |      |                 |
        +----------------------------------+----------+------+-----------------+
        |4015 Wilson-Harrington (1979 VA)  |1979 VA   |4015  |Wilson-Harrington|
        +----------------------------------+----------+------+-----------------+
        |J95X00A                           |1995 XA   |      |                 |
        +----------------------------------+----------+------+-----------------+
        |K07Tf8A                           |2007 TA418|      |                 |
        +----------------------------------+----------+------+-----------------+
        |G3693                             |          |163693|                 |
        +----------------------------------+----------+------+-----------------+
        |1A                                |1A        |      |                 |
        +----------------------------------+----------+------+-----------------+
        |A/2018 V3                         |2018 V3   |      |                 |
        +----------------------------------+----------+------+-----------------+


        """

        pat = ('(([1A][8-9][0-9]{2}[ _][A-Z]{2}[0-9]{0,3}|'
               '20[0-9]{2}[ _][A-Z]{2}[0-9]{0,3})'
               # designation [0,1]
               '|([1-9][0-9]{3}[ _](P-L|T-[1-3])))'
               # Palomar-Leiden  [0,2,3]
               '|([IJKL][0-9]{2}[A-Z][0-9a-z][0-9][A-Z]'
               '|PLS[1-9][0-9]{3}|T1S[1-9][0-9]{3}|T2S[1-9][0-9]{3}'
               '|T3S[1-9][0-9]{3})'
               # packed desig [4]
               '|(^[A-Za-z][0-9]{4}| [A-Za-z][0-9]{4})'
               # packed number [5]
               '|([A-Z]{3,} |[A-Z]{3,}$'  # capitalized acronyms
               '|van de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *'
               '|de [A-Z][a-z]*[ ^ 0-9]*[-]?[A-Z]?[a-z]*[^0-9] *'
               "|['`]?[A-Z][A-Z]*['`]?[a-z][a-z]*['`]?[^0-9]*"
               "[ -]?[A-Z]?[a-z]*[^0-9]*)"
               # name [6]
               '|((^|\b)[1-9][0-9]*(\b|$| |_))'
               # number [7,8]
               '|^(([1-9][0-9]*A))'
               # comet-style designations: 1A [10]
               '|^A/([12][0-9][0-9][0-9] [A-Z][0-9]+)'
               # asteroids with cometary orbits [12]
               )

        # regex patterns that will be rejected
        rej_pat = ('([CPXDI]/[1-2][0-9]{0,3}[ _][A-Z][0-9]*(\b|$))'
                   # comet or interstellar desig
                   '|([1-9][0-9]*[PDCXI]\b)'
                   # comet or interstellar number
                   '|([PDCXI]/)'
                   # comet type
                   '|([1-2][0-9]{0,3}[ _][a-z]{2}[0-9]{0,3})'
                   )

        raw = s.translate(str.maketrans('()_', '   ')).strip()

        # reject rej_pat patterns
        rej = re.findall(rej_pat, raw)

        if len(rej) > 0:
            raise TargetNameParseError('{} does not appear to be an '
                                       'asteroid identifier'.format(s))

        # match target patterns
        m = re.findall(pat, raw)

        r = {}

        if len(m) > 0:
            for el in m:
                # designation
                if len(el[0]) > 0:
                    if el[0][0] == 'A':
                        r['desig'] = '1'+el[0][1:]
                    else:
                        r['desig'] = el[0]
                # packed designation
                elif len(el[4]) > 0:
                    ident = el[4]
                    r['desig'] = Names.from_packed(ident)
                # packed number
                elif len(el[5]) > 0 and len(el[5]) == len(raw):
                    ident = el[5]
                    r['number'] = Names.from_packed(ident)
                # number
                elif len(el[7]) > 0:
                    r['number'] = int(float(el[7]))
                # name
                elif len(el[6]) > 0:
                    if len(el[6].strip()) > 1:
                        r['name'] = el[6].strip()
                # comet-style designation
                elif len(el[10]) > 0:
                    r['desig'] = el[10].strip()
                elif len(el[12]) > 0:
                    r['desig'] = el[12].strip()

        if len(r) == 0:
            raise TargetNameParseError(('{} does not appear to be an '
                                        'asteroid name'.format(s)))
        else:
            return r



[docs]
    @staticmethod
    def asteroid_or_comet(s):
        """Checks if an object identifier is more likely to belong to an
        asteroid or a comet.

        Parameters
        ----------
        s : str
           Target identifier.

        Returns
        -------
        target_type : str
           The target identification: ``'comet'`` or ``'asteroid'``.

        Notes
        -----
        This function uses the results of
        `~sbpy.data.Names.parse_asteroid` and
        `~sbpy.data.Names.parse_comet`. Hence, it is affected by
        ambiguities in the name/number/designation identification. If
        the name is ambiguous, a `~sbpy.data.names.TargetNameParseError`
        will be
        raised. Note that
        for any identifier that does not contain a comet type (P, D, C
        etc.), it is likely that the object gets identified as an
        asteroid.

        Examples
        --------
        >>> from sbpy.data import Names
        >>> print(Names.asteroid_or_comet('2P'))
        comet
        >>> print(Names.asteroid_or_comet('(1) Ceres'))
        asteroid

        """

        # compare lengths of dictionaries from parse_asteroid and
        # parse_comet; the longer one is more likely to describe the
        # nature of the target, if both dictionaries have the same
        # length, the nature is ambiguous
        ast = {}
        com = {}

        try:
            com = Names.parse_comet(s)
        except TargetNameParseError:
            pass

        try:
            ast = Names.parse_asteroid(s)
        except TargetNameParseError:
            pass

        if len(ast) > 0 and len(com) == 0:
            return 'asteroid'
        elif len(com) > 0 and len(ast) == 0:
            return 'comet'
        else:
            raise TargetNameParseError('Target nature unclear.')
Navigation

Source code for sbpy.data.names