Source code for giant.catalogues.tycho

# Copyright 2021 United States Government as represented by the Administrator of the National Aeronautics and Space
# Administration.  No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.


r"""
This module defines the interface to the Tycho 2 star catalogue.

Catalogue Description
=====================

The Tycho2 is a bright star catalogue containing positions, proper motions, and photometry for the 2.5 million brightest
stars in the sky based solely on observations from the Hipparcos satellite.  This corresponds to nearly complete
coverage down to a visual magnitude magnitude of about 11.0.

The Tycho 2 catalogue uses a csv text file to store the stars.  It is not very efficient for querying either large or
small numbers of stars.  It also does not include blended stars (stars that are close enough together to appear as a
single source in an image).  If you need faster retrieval and/or blended stars then you should use the
:mod:`.giant_catalogue` instead.

For a more thorough description of the Tycho2 star catalogue see https://www.cosmos.esa.int/web/hipparcos/tycho-2.

Use
===

The Tycho 2 catalogue can be used anywhere that a star catalogue is required in GIANT.
It is stored in 3 csv files plus an index csv that takes about 500 MB of disk space.  If you attempt to
initialize the class and point it to a directory that does not contain the Tycho 2 data it will ask you if you want to
download the catalogue (note that the Tycho 2 data is not included by default so if you have not downloaded it yourself
you will definitely need to).  If you answer yes, be aware that it may take a very long time to download.

Once you have initialized the class (and downloaded the data files), then you can access the catalogue as you would any
GIANT usable catalogue.  Simply call :meth:`~.Tycho2.query_catalogue` to get the GIANT records for the stars as a
dataframe with columns according the :attr:`.GIANT_COLUMNS`.  This class also provides a helper method,
:meth:`~.Tycho2.query_catalogue_raw`, which can be used to retrieve the raw catalogue entries (instead of the GIANT
entries).
"""

import os
from pathlib import Path

from io import StringIO

import time

from datetime import datetime

import warnings

from typing import Optional, TextIO, List, Union

import numpy as np
import pandas as pd

from giant.catalogues.meta_catalogue import Catalogue, GIANT_TYPES, GIANT_COLUMNS
from giant.catalogues.utilities import radec_distance, DEG2RAD, STAR_DIST, PARSEC2KM, DEG2MAS, apply_proper_motion

from giant._typing import PATH, Real, ARRAY_LIKE


TYCHO_DIR = Path(__file__).resolve().parent / "data" / "TYCHO2"  # type: Path
"""
This gives the default location of the Tycho 2 catalogue files.

The default location is a directory called "data" in the directory containing this source file.
"""


[docs]class Tycho2(Catalogue): """ This class provides access to the Tycho 2 star catalogue. This class is a fully functional catalogue for GIANT and can be used anywhere that GIANT expects a star catalogue. As such, it implements the :attr:`include_proper_motion` to turn proper motion on or off as well as the method :meth:`query_catalogue` which is how stars are queried into the GIANT format. In addition, this catalogue provides 1 additional method :meth:`query_catalogue_raw` which returns the raw Tycho 2 records for stars instead of the GIANT records. This method isn't used anywhere by GIANT itself, but may be useful if you are doing some advanced analysis. To use this class simply initialize it, pointing to the directory where the Tycho 2 catalogue files index.dat, suppl_1.dat, and tyc2.dat are contained. If the catalogue files do not exist it will ask you if you want to download them, and if you answer yes, it will download the Tycho 2 catalogue (which takes a long time in most instances). Once the class is initialized, you can query stars from it using :meth:`query_catalogue` which will return a dataframe of the star records with :attr:`.GIANT_COLUMNS` columns. """ def __init__(self, directory: PATH = TYCHO_DIR, include_proper_motion: bool = True): """ :param directory: The directory containing the Tycho 2 catalogue files. This should contain index.dat, suppl_1.dat, and tyc2.dat as csv files. :param include_proper_motion: A boolean flag specifying whether to apply proper motion when retrieving the stars """ # call the subclass super().__init__(include_proper_motion=include_proper_motion) directory = Path(directory) self._root: Path = directory """ The root directory where the catalogue files are stored """ if not directory.exists(): print("Tycho data not found at {}".format(directory), flush=True) user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n" " WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL" " USE UP 500 MB OF SPACE!\n ") if user_response[:1].lower() == 'y': # make sure the directory exists directory.mkdir(exist_ok=True, parents=True) download_tycho(directory) else: raise FileNotFoundError('The Tycho data is not available in the specified directory. Cannot initialize' 'The Tycho2 class.') # store the index file for the catalogue self._index_file: Path = self._root / 'index.dat' """ The index file for the catalogue """ if not self._index_file.exists(): print("Tycho index data missing at {}".format(directory), flush=True) user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n" " WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL" " USE UP 500 MB OF SPACE!\n ") if user_response[:1].lower() == 'y': download_tycho(directory) else: raise FileNotFoundError('The Tycho2 2 index file could not be located.\n' 'Please ensure that "index.dat" is in your tycho directory to proceed') # build the index in memory self._index: Optional[pd.DataFrame] = None """ The index specifying where stars are in the catalogue """ self._build_index() # store the main data file for the catalogue try: self._main: TextIO = (self._root / 'tyc2.dat').open('r') """ The main file object for the catalogue """ # figure out the length of a line self._main.readline() self._main_line_length = self._main.tell() """ The length of a line in the main table """ self._main.seek(0) except FileNotFoundError: print("Tycho data missing at {}".format(directory), flush=True) user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n" " WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL" " USE UP 500 MB OF SPACE!\n ") if user_response[:1].lower() == 'y': download_tycho(directory) self._main = (self._root / 'tyc2.dat').open('r') self._main.readline() self._main_line_length = self._main.tell() self._main.seek(0) else: raise FileNotFoundError('The Tycho2 2 main file could not be located.\n' 'Please ensure that "tyc2.dat" is in your tycho directory to proceed') # store supplement 1 for the catalogue try: self._sup1: TextIO = (self._root / 'suppl_1.dat').open('r') """ The first supplement file object """ self._sup1.readline() self._sup1_line_length: int = self._sup1.tell() """ The length of a line in the first supplement table """ self._sup1.seek(0) except FileNotFoundError: print("Tycho supplement data missing at {}".format(directory), flush=True) user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n" " WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL" " USE UP 500 MB OF SPACE!\n ") if user_response[:1].lower() == 'y': download_tycho(directory) self._sup1 = (self._root / 'suppl_1.dat').open('r') self._sup1.readline() self._sup1_line_length = self._sup1.tell() self._sup1.seek(0) else: raise FileNotFoundError('The first supplement file could not be located.\n' 'Please ensure that "suppl_1.dat" is in your tycho directory to proceed') # store the column names and dtypes for the catalogue # noinspection SpellCheckingInspection self._names: List[str] = ['TYCID', 'pflag', 'RAmdeg', 'DEmdeg', 'pmRA', 'pmDE', 'e_RAmdeg', 'e_DEmdeg', 'e_pmRA', 'e_pmDE', 'EpRAm', 'EpDEm', 'Num', 'q_RAmdeg', 'q_DEmdeg', 'q_pmRA', 'q_pmDE', 'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag', 'prox', 'TYC', 'HIPpCCDM', 'RAdeg', 'DEdeg', 'EpRAm1990', 'EpDEm1990', 'e_RAdeg', 'e_DEdeg', 'posflg', 'corr'] """ The names of the columns from the main file """ # noinspection SpellCheckingInspection self._sup1_names: List[str] = ['TYCID', 'flag', 'RAdeg', 'DEdeg', 'pmRA', 'pmDE', 'e_RAdeg', 'e_DEdeg', 'e_pmRA', 'e_pmDE', 'mflag', 'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag', 'prox', 'TYC', 'HIPpCCDM'] """ The names of the columns from the first supplement file """ # noinspection SpellCheckingInspection self._sup1_rename: List[str] = ['TYCID', 'flag', 'RAmdeg', 'DEmdeg', 'pmRA', 'pmDE', 'e_RAmdeg', 'e_DEmdeg', 'e_pmRA', 'e_pmDE', 'mflag', 'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag', 'prox', 'TYC', 'HIPpCCDM'] """ The names of the columns that the first supplement records are renamed to so they can be merged with the main catalogue files. """ # store the data types for the columns # noinspection SpellCheckingInspection self._dtypes: List[type] = [np.unicode_, # TYC1, TYC2, TYC3 np.unicode_, # pflag np.float64, np.float64, # RAmdeg, DEmdeg np.float64, np.float64, # pmRA, pmDE np.unicode_, np.float64, # e_RAmdeg, e_DEmdeg np.float64, np.float64, # e_pmRA, e_pmDE np.float64, np.float64, # EpRAm, EpDEm np.float64, # Num np.float64, np.float64, # q_RAmdeg, q_DEmdeg np.float64, np.float64, # q_pmRA, q_pmDE np.float64, np.float64, # BTmag, e_BTmag np.float64, np.float64, # VTmag, e_VTmag np.float64, # prox np.unicode_, # TYC np.unicode_, # HIP, CCDM np.float64, np.float64, # RAdeg, DEdeg np.float64, np.float64, # EpRA-1990, EpDE-1990 np.float64, np.float64, # e_RAdeg, e_DEdeg np.unicode_, # posflg np.float64] # corr """ This list specifies the types of each column of the main catalogue (as raw types) """ # noinspection SpellCheckingInspection self._sup1_dtypes = [np.unicode_, # TYC1, TYC2, TYC3 np.unicode_, # flag np.float64, np.float64, # RAdeg, DEdeg np.float64, np.float64, # pmRA, pmDE np.float64, np.float64, # e_RAdeg, e_DEdeg np.float64, np.float64, # e_pmRA, e_pmDE np.unicode_, # mflag np.float64, np.float64, # BTmag, e_BTmag np.float64, np.float64, # VTmag, e_VTmag np.float64, # prox np.unicode_, # TYC np.unicode_] # HIP, CCDM """ This list specifies the types of each column of the secondary file (as raw types) """ def _build_index(self): """ This method stores the index in memory from the index file """ # noinspection SpellCheckingInspection self._index = pd.read_csv(self._index_file, sep='|', header=None, index_col=False, names=['mstars', 'sstars', 'minra', 'maxra', 'mindec', 'maxdec'], dtype={'mstars': np.uint32, 'sstars': np.uint16, 'minra': np.float64, 'maxra': np.float64, 'mindec': np.float64, 'maxdec': np.float64})
[docs] def empty_frame(self) -> pd.DataFrame: """ This simple helper function returns an empty dataframe with the appropriate columns. :return: The empty dataframe """ return self._process_results([])
[docs] def nan_frame(self, index: Optional[str] = None) -> pd.DataFrame: """ This simple helper function returns a dataframe with a single NaN filled row. The index of the row will either be all 0 or will be the input value :return: The nan filled dataframe """ if index is not None: return self._process_results([pd.DataFrame([[index] + [np.nan if x != np.unicode_ else '' for x in self._dtypes[1:]]], columns=self._names)]) else: return self._process_results([pd.DataFrame([['0 0 0'] + [np.nan if x != np.unicode_ else '' for x in self._dtypes[1:]]], columns=self._names)])
[docs] def retrieve_record(self, tycho_id: str) -> pd.DataFrame: """ This method can be used to retrieve a single star by ID from the tycho 2 main catalogue or first supplement file. The star is returned in the raw Tycho catalogue format, not in the GIANT format. :param tycho_id: The tycho id as a string with each component separated by a space :return: The found star record, or a record filled with NaN """ zone = int(tycho_id.split()[0]) - 1 start = self._index.iloc[zone] stop = self._index.iloc[zone + 1] self._main.seek((start.mstars - 1) * self._main_line_length, os.SEEK_SET) ind = 0 for line in self._main: if tycho_id in line[:12]: self._main.seek(0, os.SEEK_SET) stream = StringIO(line) record = pd.read_csv(stream, sep='|', header=None, index_col=False, names=self._names, dtype=dict(zip(self._names, self._dtypes)), na_values=[' ' * length for length in range(20)]) return self._process_results([record]) if ind == stop.mstars + 1: break ind += 1 self._sup1.seek((start.sstars - 1) * self._sup1_line_length, os.SEEK_SET) ind = 0 for line in self._sup1: if tycho_id in line[:12]: self._sup1.seek(0, os.SEEK_SET) stream = StringIO(line) record = pd.read_csv(stream, sep='|', header=None, index_col=False, names=self._sup1_names, dtype=dict(zip(self._sup1_names, self._sup1_dtypes)), na_values=[' ' * length for length in range(20)]) return self._process_results([record], rtype='supp') if ind == stop.sstars + 1: break ind += 1 warnings.warn('Tycho2 record for star {} not found'.format(tycho_id)) return self.nan_frame(index=tycho_id)
[docs] def query_catalogue(self, ids: Optional[ARRAY_LIKE] = None, min_ra: Real = 0, max_ra: Real = 360, min_dec: Real = -90, max_dec: Real = 90, min_mag: Real = -4, max_mag: Real = 20, search_center: Optional[ARRAY_LIKE] = None, search_radius: Optional[Real] = None, new_epoch: Optional[Union[datetime, Real]] = None) -> pd.DataFrame: """ This method queries stars from the catalogue that meet specified constraints and returns them as a DataFrame with columns of :attr:`.GIANT_COLUMNS`. Stars can either be queried by ID directly or by right ascension/declination/magnitude. You cannot filter using both with this method. If :attr:`apply_proper_motion` is ``True`` then this will shift the stars to the new epoch input by the user (``new_epoch``) using proper motion. :param ids: A sequence of star ids to retrieve from the catalogue. The ids are given by zone, rnz and should be input as an iterable that yields tuples (therefore if you have a dataframe you should do ``df.itertuples(false)`` :param min_ra: The minimum ra bound to query stars from in degrees :param max_ra: The maximum ra bound to query stars from in degrees :param min_dec: The minimum declination to query stars from in degrees :param max_dec: The maximum declination to query stars from in degrees :param min_mag: The minimum magnitude to query stars from. Recall that magnitude is inverse (so lower magnitude is a dimmer star) :param max_mag: The maximum magnitude to query stars from. Recall that magnitude is inverse (so higher magnitude is a dimmer star) :param search_center: The center of a search cone as a ra/dec pair. :param search_radius: The radius about the center of the search cone :param new_epoch: The epoch to translate the stars to using proper motion if :attr:`apply_proper_motion` is turned on :return: A Pandas dataframe with columns :attr:`GIANT_COLUMNS`. """ if ids is not None: out = [] for star_id in ids: out.append(self.retrieve_record(star_id)) cat_recs = pd.concat(out) else: # query the catalogue to get the full records cat_recs = self._get_all_with_criteria(min_ra=min_ra, max_ra=max_ra, min_dec=min_dec, max_dec=max_dec, min_visual_mag=min_mag, max_visual_mag=max_mag, search_radius=search_radius, search_center=search_center) # drop anything that isn't well known cat_recs = cat_recs[~cat_recs.loc[:, 'pmRA':'pmDE'].isnull().any(axis=1)] # convert each to the format GIANT expects giant_records = self.convert_to_giant_format(cat_recs) # apply the proper motion if requested if self.include_proper_motion and (new_epoch is not None): apply_proper_motion(giant_records, new_epoch, copy=False) return giant_records
[docs] def query_catalogue_raw(self, ids: Optional[ARRAY_LIKE] = None, min_ra: Real = 0, max_ra: Real = 360, min_dec: Real = -90, max_dec: Real = 90, min_visual_mag: Real = -4, max_visual_mag: Real = 20, search_center: Optional[ARRAY_LIKE] = None, search_radius: Optional[Real] = None) -> pd.DataFrame: """ This method queries stars from the catalogue that meet specified constraints and returns them as a DataFrame where the columns are the raw catalogue columns. Stars can either be queried by ID directly or by right ascension/declination/magnitude. You cannot filter using both with this method. This method is not usable by GIANT and it does not apply proper motion. If you need records that are usable by GIANT and with proper motion applied see :meth:`query_catalogue`. For details on what the columns are refer to the Tycho 2 documentation (can be found online). :param ids: A sequence of star ids to retrieve from the catalogue. The ids are given by string and should be ``'{TYC1} {TYC2} {TYC3}'`` where ``TYC*`` are the 3 components of the Tycho ID of the star. :param min_ra: The minimum ra bound to query stars from in degrees :param max_ra: The maximum ra bound to query stars from in degrees :param min_dec: The minimum declination to query stars from in degrees :param max_dec: The maximum declination to query stars from in degrees :param min_visual_mag: The minimum visual magnitude to query stars from. Recall that magnitude is inverse (so lower magnitude is a dimmer star) :param max_visual_mag: The maximum visual magnitude to query stars from. Recall that magnitude is inverse (so higher magnitude is a dimmer star) :param search_center: The center of a search cone as a ra/dec pair. :param search_radius: The radius about the center of the search cone :return: A Pandas dataframe with the original columns form the star catalogue. """ if ids is not None: out = [] for star_id in ids: out.append(self.retrieve_record(star_id)) return pd.concat(out) else: # query the catalogue to get the full records return self._get_all_with_criteria(min_ra=min_ra, max_ra=max_ra, min_dec=min_dec, max_dec=max_dec, min_visual_mag=min_visual_mag, max_visual_mag=max_visual_mag, search_radius=search_radius, search_center=search_center)
def _get_all_with_criteria(self, min_ra: Real = 0., max_ra: Real = 360., min_dec: Real = -90., max_dec: Real = 90., search_center: Optional[ARRAY_LIKE] = None, search_radius: Optional[Real] = None, max_visual_mag: Real = 20., min_visual_mag: Real = -1.44, max_b_mag: Real = 20., min_b_mag: Real = -1.44, ) -> pd.DataFrame: """ This function gets all stars meeting the criteria from the catalogue, yielding the results as DataFrames. In general, the user should not interact with this method and instead should use :meth:`query_catalogue_raw`. :param min_ra: The minimum ra bound to query stars from in degrees :param max_ra: The maximum ra bound to query stars from in degrees :param min_dec: The minimum declination to query stars from in degrees :param max_dec: The maximum declination to query stars from in degrees :param min_visual_mag: The minimum visual magnitude to query stars from. Recall that magnitude is inverse (so lower magnitude is a dimmer star) :param max_visual_mag: The maximum visual magnitude to query stars from. Recall that magnitude is inverse (so higher magnitude is a dimmer star) :param min_b_mag: The minimum b magnitude to query stars from. Recall that magnitude is inverse (so lower magnitude is a dimmer star) :param max_b_mag: The maximum b magnitude to query stars from. Recall that magnitude is inverse (so higher magnitude is a dimmer star) :param search_center: The center of a search cone as a ra/dec pair. :param search_radius: The radius about the center of the search cone :return: An Iterable of Pandas dataframes with columns according to the catalogue columns. """ # retrieve the required columns from the index for ease of use ind_min_ra = self._index.minra ind_max_ra = self._index.maxra ind_min_dec = self._index.mindec ind_max_dec = self._index.maxdec # determine which GSC zones we need to check if search_center is not None: min_ra2 = search_center[0] - search_radius max_ra2 = search_center[0] + search_radius min_dec2 = search_center[1] - search_radius max_dec2 = search_center[1] + search_radius if min_dec2 < -90: # if the search region includes the south pole take all RA min_dec2 = -90 min_ra2 = 0 max_ra2 = 360 elif max_dec2 > 90: # if the search region includes the north pole take all RA max_dec2 = 90 min_ra2 = 0 max_ra2 = 360 # if the first point of ares is included then we need to query both the high and low RA if (min_ra2 < 0) and (max_ra2 < 360): index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) & (ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \ ((((ind_min_ra <= max_ra2) & (ind_max_ra >= 0)) | ((ind_min_ra <= 360) & (ind_max_ra >= min_ra2 + 360))) & (ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2)) elif (min_ra2 < 0) and (max_ra2 >= 360): # if we need the whole ra band min_ra2 = 0 max_ra2 = 360 index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) & (ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \ ((ind_min_ra <= max_ra2) & (ind_max_ra >= min_ra2) (ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2)) elif max_ra2 >= 360: # if the first point of ares is included then we need to query both the high and low RA index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) & (ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \ ((((ind_min_ra <= 360) & (ind_max_ra >= min_ra2)) | ((ind_min_ra <= max_ra2-360) & (ind_max_ra >= 0))) & (ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2)) else: # other wise nothing special index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) & (ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \ ((ind_min_ra <= max_ra2) & (ind_max_ra >= min_ra2) (ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2)) else: index_check = ((ind_min_ra < max_ra) & (ind_max_ra > min_ra) & (ind_min_dec < max_dec) & (ind_max_dec > min_dec)) # ################################################# MAIN FILE ################################################## # get the start and stop lines for each zone we need to consider start_lines = self._index.loc[index_check].mstars - 1 # type: pd.Series end_lines = self._index.loc[start_lines.index + 1].mstars # type: pd.Series results = [] for start, stop in zip(start_lines, end_lines): # seek to the proper point in the file self._main.seek(start * self._main_line_length, os.SEEK_SET) # read the file for the current GSC chunk df = pd.read_csv(self._main, sep='|', header=None, index_col=False, names=self._names, dtype=dict(zip(self._names, self._dtypes)), nrows=stop - start + 1, na_values=[' ' * length for length in range(20)]) # perform comparisons visual_check = (df.VTmag >= min_visual_mag) & (df.VTmag <= max_visual_mag) b_check = (df.BTmag >= min_b_mag) & (df.BTmag <= max_b_mag) test = ((df.RAmdeg >= min_ra) & (df.RAmdeg <= max_ra) & (df.DEmdeg >= min_dec) & (df.DEmdeg <= max_dec) & ((visual_check & b_check) | (df.VTmag.isnull() & b_check) | (df.BTmag.isnull() & visual_check))) if search_center is not None: # check the radial distance if we're doing a cone search test &= radec_distance(df.RAmdeg * DEG2RAD, df.DEmdeg * DEG2RAD, search_center[0] * DEG2RAD, search_center[1] * DEG2RAD) <= ( search_radius * DEG2RAD) # check to see if anything met the criteria if test.any(): results.append(df.loc[test]) # ############################################### SUPPLEMENT FILE ############################################## # get the start and stop lines for each zone we need to consider start_lines = self._index.loc[index_check].sstars - 1 # type: pd.Series end_lines = self._index.loc[start_lines.index + 1].sstars # type: pd.Series sup1results = [] for start, stop in zip(start_lines, end_lines): # seek to the proper point in the file self._sup1.seek(start * self._sup1_line_length, os.SEEK_SET) # if there are no supplement stars to consider if (stop - start + 1) == 0: continue # read the file for the current GSC chunk df = pd.read_csv(self._sup1, sep='|', header=None, index_col=False, names=self._sup1_names, dtype=dict(zip(self._sup1_names, self._sup1_dtypes)), nrows=stop - start + 1, na_values=[' ' * length for length in range(20)]) # perform comparisons visual_check = (df.VTmag >= min_visual_mag) & (df.VTmag <= max_visual_mag) b_check = (df.BTmag >= min_b_mag) & (df.BTmag <= max_b_mag) test = ((df.RAdeg >= min_ra) & (df.RAdeg <= max_ra) & (df.DEdeg >= min_dec) & (df.DEdeg <= max_dec) & ((visual_check & b_check) | (df.VTmag.isnull() & b_check) | (df.BTmag.isnull() & visual_check))) if search_center is not None: # check the radial distance if we're doing a cone search test &= radec_distance(df.RAdeg * DEG2RAD, df.DEdeg * DEG2RAD, search_center[0] * DEG2RAD, search_center[1] * DEG2RAD) <= ( search_radius * DEG2RAD) # check to see if anything met the criteria if test.any(): sup1results.append(df.loc[test]) return pd.concat([self._process_results(results), self._process_results(sup1results, rtype='supp')]) def _process_results(self, res: List[pd.DataFrame], rtype: str = 'main') -> pd.DataFrame: """ This modifies the star records to use the same format, have the right index, and label whether they are main or supplemental stars. :param res: The frames to modify and join together :param rtype: the type of frames, either main or supp :return: The concatenated and modified dataframe """ if res: # if we found anything then concatenate all of the results together big_df = pd.concat(res) # type: pd.DataFrame # split the TYC ID column into its components # noinspection SpellCheckingInspection tycid = big_df['TYCID'].str.split(expand=True) big_df['TYC1'] = tycid[0].astype(np.uint16) big_df['TYC2'] = tycid[1].astype(np.uint16) big_df['TYC3'] = tycid[2].astype(np.uint8) if 'supp' in rtype: out = big_df.loc[:, 'flag':].set_index(['TYC1', 'TYC2', 'TYC3']).assign(tycho_source='supp') return out.rename(columns=dict(zip(self._sup1_names, self._sup1_rename))) else: # noinspection SpellCheckingInspection out = big_df.loc[:, 'pflag':].set_index(['TYC1', 'TYC2', 'TYC3']).assign(tycho_source='main') return out else: return pd.DataFrame(columns=self._names[1:] + ['TYC1', 'TYC2', 'TYC3'] + ['tycho_source']).set_index(['TYC1', 'TYC2', 'TYC3'])
[docs] @staticmethod def convert_to_giant_format(tycho_recs): tycho_recs['distance'] = STAR_DIST tycho_recs['distance_sigma'] = 20 / (STAR_DIST / PARSEC2KM / 1000) ** 2 * PARSEC2KM * 1000 # noinspection SpellCheckingInspection tycho_cols = ['RAmdeg', 'DEmdeg', 'distance', 'pmRA', 'pmDE', 'VTmag', 'e_RAmdeg', 'e_DEmdeg', 'distance_sigma', 'e_pmRA', 'e_pmDE'] records = tycho_recs.loc[:, tycho_cols].rename(columns=dict(zip(tycho_cols, GIANT_COLUMNS))) records.dtypes.loc[GIANT_COLUMNS] = GIANT_TYPES # convert to giant units records['ra_sigma'] /= DEG2MAS records['dec_sigma'] /= DEG2MAS records['ra_proper_motion'] /= DEG2MAS records['dec_proper_motion'] /= DEG2MAS records['ra_pm_sigma'] /= DEG2MAS records['dec_pm_sigma'] /= DEG2MAS main_stars = tycho_recs['tycho_source'] == 'main' # update ra_sigma and dec_sigma to J2000 for the main records ra_shift_time = 2000.0 - tycho_recs.loc[main_stars, 'EpRAm'] dec_shift_time = 2000.0 - tycho_recs.loc[main_stars, 'EpDEm'] records = records.assign(epoch=2000.0) records.loc[main_stars, 'ra_sigma'] = np.sqrt(records['ra_sigma'] ** 2 + ra_shift_time ** 2 * records['ra_pm_sigma'] ** 2) records.loc[main_stars, 'dec_sigma'] = np.sqrt(records['dec_sigma'] ** 2 + dec_shift_time ** 2 * records['dec_pm_sigma'] ** 2) # set the epoch for the supplement stars records.loc[~main_stars, "epoch"] = 1991.25 return records
[docs]def download_tycho(target_directory: Path): """ This function downloads the Tycho2 catalogue from vizier to the target directory. This is done over ftp. It requires an active internet connection that can connect to cdsarc.u-strasbg.fr .. warning:: This download will take a long time and use up approximately 500 MB of space. .. warning:: This download has no way to verify the integrity of the files because no hash is provided. While the vizier service is trusted, use this function at your own risk :param target_directory: the directory to save the Tycho catalogue to """ # we minimize the security risk here by using FTPS import ftplib # nosec from gzip import decompress target_directory.mkdir(exist_ok=True, parents=True) # FTPS is secure ftp = ftplib.FTP_TLS('cdsarc.u-strasbg.fr') # nosec # anonymous login since we're just grabbing data ftp.connect() ftp.sendcmd('USER anonymous') ftp.sendcmd('PASS anonymous@a.com') ftp.cwd('pub/cats/I/259/') lines = [] ftp.retrlines('LIST', callback=lines.append) tyc_file = target_directory / "tyc2.dat" if tyc_file.exists(): # need to delete this file since we will append to it tyc_file.unlink() for line in lines: # file name = line.split()[-1] if ".dat" in name: start = time.time() if 'tyc2' in name: local = target_directory / "tyc2.dat" mode = "ab" else: local = target_directory / name.replace('.gz', '') mode = 'wb' with local.open(mode) as download_file: writer = download_file.write # noinspection PyTypeChecker,SpellCheckingInspection ftp.retrbinary('RETR {}'.format(name), writer) print('{} done in {:.3f}'.format(name, time.time()-start), flush=True) # there is some risk here because no hash is provided with the files but what can you do? print('decompressing the data') for file in target_directory.glob('*'): with file.open('rb') as out_file: decompressed = decompress(out_file.read()) with file.open('wb') as out_file: out_file.write(decompressed)