# Copyright 2021 United States Government as represented by the Administrator of the National Aeronautics and Space
# Administration. No copyright is claimed in the United States under Title 17, U.S. Code. All Other Rights Reserved.
r"""
This module defines the interface to the Tycho 2 star catalogue.
Catalogue Description
=====================
The Tycho2 is a bright star catalogue containing positions, proper motions, and photometry for the 2.5 million brightest
stars in the sky based solely on observations from the Hipparcos satellite. This corresponds to nearly complete
coverage down to a visual magnitude magnitude of about 11.0.
The Tycho 2 catalogue uses a csv text file to store the stars. It is not very efficient for querying either large or
small numbers of stars. It also does not include blended stars (stars that are close enough together to appear as a
single source in an image). If you need faster retrieval and/or blended stars then you should use the
:mod:`.giant_catalogue` instead.
For a more thorough description of the Tycho2 star catalogue see https://www.cosmos.esa.int/web/hipparcos/tycho-2.
Use
===
The Tycho 2 catalogue can be used anywhere that a star catalogue is required in GIANT.
It is stored in 3 csv files plus an index csv that takes about 500 MB of disk space. If you attempt to
initialize the class and point it to a directory that does not contain the Tycho 2 data it will ask you if you want to
download the catalogue (note that the Tycho 2 data is not included by default so if you have not downloaded it yourself
you will definitely need to). If you answer yes, be aware that it may take a very long time to download.
Once you have initialized the class (and downloaded the data files), then you can access the catalogue as you would any
GIANT usable catalogue. Simply call :meth:`~.Tycho2.query_catalogue` to get the GIANT records for the stars as a
dataframe with columns according the :attr:`.GIANT_COLUMNS`. This class also provides a helper method,
:meth:`~.Tycho2.query_catalogue_raw`, which can be used to retrieve the raw catalogue entries (instead of the GIANT
entries).
"""
import os
from pathlib import Path
from io import StringIO
import time
from datetime import datetime
import warnings
from typing import Optional, TextIO, List, Union
import numpy as np
import pandas as pd
from giant.catalogues.meta_catalogue import Catalogue, GIANT_TYPES, GIANT_COLUMNS
from giant.catalogues.utilities import radec_distance, DEG2RAD, STAR_DIST, PARSEC2KM, DEG2MAS, apply_proper_motion
from giant._typing import PATH, Real, ARRAY_LIKE
TYCHO_DIR = Path(__file__).resolve().parent / "data" / "TYCHO2" # type: Path
"""
This gives the default location of the Tycho 2 catalogue files.
The default location is a directory called "data" in the directory containing this source file.
"""
[docs]class Tycho2(Catalogue):
"""
This class provides access to the Tycho 2 star catalogue.
This class is a fully functional catalogue for GIANT and can be used anywhere that GIANT expects a star catalogue.
As such, it implements the :attr:`include_proper_motion` to turn proper motion on or off as well as the method
:meth:`query_catalogue` which is how stars are queried into the GIANT format. In addition, this catalogue provides
1 additional method :meth:`query_catalogue_raw` which returns the raw Tycho 2 records for stars instead of the GIANT
records. This method isn't used anywhere by GIANT itself, but may be useful if you are doing some advanced analysis.
To use this class simply initialize it, pointing to the directory where the Tycho 2 catalogue files index.dat,
suppl_1.dat, and tyc2.dat are contained. If the catalogue files do not exist it will ask you if you want to
download them, and if you answer yes, it will download the Tycho 2 catalogue (which takes a long time in most
instances). Once the class is initialized, you can query stars from it using :meth:`query_catalogue` which will
return a dataframe of the star records with :attr:`.GIANT_COLUMNS` columns.
"""
def __init__(self, directory: PATH = TYCHO_DIR, include_proper_motion: bool = True):
"""
:param directory: The directory containing the Tycho 2 catalogue files. This should contain index.dat,
suppl_1.dat, and tyc2.dat as csv files.
:param include_proper_motion: A boolean flag specifying whether to apply proper motion when retrieving the stars
"""
# call the subclass
super().__init__(include_proper_motion=include_proper_motion)
directory = Path(directory)
self._root: Path = directory
"""
The root directory where the catalogue files are stored
"""
if not directory.exists():
print("Tycho data not found at {}".format(directory), flush=True)
user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n"
" WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL"
" USE UP 500 MB OF SPACE!\n ")
if user_response[:1].lower() == 'y':
# make sure the directory exists
directory.mkdir(exist_ok=True, parents=True)
download_tycho(directory)
else:
raise FileNotFoundError('The Tycho data is not available in the specified directory. Cannot initialize'
'The Tycho2 class.')
# store the index file for the catalogue
self._index_file: Path = self._root / 'index.dat'
"""
The index file for the catalogue
"""
if not self._index_file.exists():
print("Tycho index data missing at {}".format(directory), flush=True)
user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n"
" WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL"
" USE UP 500 MB OF SPACE!\n ")
if user_response[:1].lower() == 'y':
download_tycho(directory)
else:
raise FileNotFoundError('The Tycho2 2 index file could not be located.\n'
'Please ensure that "index.dat" is in your tycho directory to proceed')
# build the index in memory
self._index: Optional[pd.DataFrame] = None
"""
The index specifying where stars are in the catalogue
"""
self._build_index()
# store the main data file for the catalogue
try:
self._main: TextIO = (self._root / 'tyc2.dat').open('r')
"""
The main file object for the catalogue
"""
# figure out the length of a line
self._main.readline()
self._main_line_length = self._main.tell()
"""
The length of a line in the main table
"""
self._main.seek(0)
except FileNotFoundError:
print("Tycho data missing at {}".format(directory), flush=True)
user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n"
" WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL"
" USE UP 500 MB OF SPACE!\n ")
if user_response[:1].lower() == 'y':
download_tycho(directory)
self._main = (self._root / 'tyc2.dat').open('r')
self._main.readline()
self._main_line_length = self._main.tell()
self._main.seek(0)
else:
raise FileNotFoundError('The Tycho2 2 main file could not be located.\n'
'Please ensure that "tyc2.dat" is in your tycho directory to proceed')
# store supplement 1 for the catalogue
try:
self._sup1: TextIO = (self._root / 'suppl_1.dat').open('r')
"""
The first supplement file object
"""
self._sup1.readline()
self._sup1_line_length: int = self._sup1.tell()
"""
The length of a line in the first supplement table
"""
self._sup1.seek(0)
except FileNotFoundError:
print("Tycho supplement data missing at {}".format(directory), flush=True)
user_response = input("Would you like to download the Tycho data to this directory (y/n)?\n"
" WARNING: THIS REQUIRES AN INTERNET CONNECTION, WILL TAKE A LONG TIME, AND WILL"
" USE UP 500 MB OF SPACE!\n ")
if user_response[:1].lower() == 'y':
download_tycho(directory)
self._sup1 = (self._root / 'suppl_1.dat').open('r')
self._sup1.readline()
self._sup1_line_length = self._sup1.tell()
self._sup1.seek(0)
else:
raise FileNotFoundError('The first supplement file could not be located.\n'
'Please ensure that "suppl_1.dat" is in your tycho directory to proceed')
# store the column names and dtypes for the catalogue
# noinspection SpellCheckingInspection
self._names: List[str] = ['TYCID',
'pflag',
'RAmdeg', 'DEmdeg', 'pmRA', 'pmDE',
'e_RAmdeg', 'e_DEmdeg', 'e_pmRA', 'e_pmDE',
'EpRAm', 'EpDEm',
'Num',
'q_RAmdeg', 'q_DEmdeg', 'q_pmRA', 'q_pmDE',
'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag',
'prox',
'TYC', 'HIPpCCDM',
'RAdeg', 'DEdeg',
'EpRAm1990', 'EpDEm1990',
'e_RAdeg', 'e_DEdeg',
'posflg',
'corr']
"""
The names of the columns from the main file
"""
# noinspection SpellCheckingInspection
self._sup1_names: List[str] = ['TYCID',
'flag',
'RAdeg', 'DEdeg', 'pmRA', 'pmDE',
'e_RAdeg', 'e_DEdeg', 'e_pmRA', 'e_pmDE',
'mflag',
'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag',
'prox',
'TYC', 'HIPpCCDM']
"""
The names of the columns from the first supplement file
"""
# noinspection SpellCheckingInspection
self._sup1_rename: List[str] = ['TYCID',
'flag',
'RAmdeg', 'DEmdeg', 'pmRA', 'pmDE',
'e_RAmdeg', 'e_DEmdeg', 'e_pmRA', 'e_pmDE',
'mflag',
'BTmag', 'e_BTmag', 'VTmag', 'e_VTmag',
'prox', 'TYC', 'HIPpCCDM']
"""
The names of the columns that the first supplement records are renamed to so they can be merged with the main
catalogue files.
"""
# store the data types for the columns
# noinspection SpellCheckingInspection
self._dtypes: List[type] = [np.unicode_, # TYC1, TYC2, TYC3
np.unicode_, # pflag
np.float64, np.float64, # RAmdeg, DEmdeg
np.float64, np.float64, # pmRA, pmDE
np.unicode_, np.float64, # e_RAmdeg, e_DEmdeg
np.float64, np.float64, # e_pmRA, e_pmDE
np.float64, np.float64, # EpRAm, EpDEm
np.float64, # Num
np.float64, np.float64, # q_RAmdeg, q_DEmdeg
np.float64, np.float64, # q_pmRA, q_pmDE
np.float64, np.float64, # BTmag, e_BTmag
np.float64, np.float64, # VTmag, e_VTmag
np.float64, # prox
np.unicode_, # TYC
np.unicode_, # HIP, CCDM
np.float64, np.float64, # RAdeg, DEdeg
np.float64, np.float64, # EpRA-1990, EpDE-1990
np.float64, np.float64, # e_RAdeg, e_DEdeg
np.unicode_, # posflg
np.float64] # corr
"""
This list specifies the types of each column of the main catalogue (as raw types)
"""
# noinspection SpellCheckingInspection
self._sup1_dtypes = [np.unicode_, # TYC1, TYC2, TYC3
np.unicode_, # flag
np.float64, np.float64, # RAdeg, DEdeg
np.float64, np.float64, # pmRA, pmDE
np.float64, np.float64, # e_RAdeg, e_DEdeg
np.float64, np.float64, # e_pmRA, e_pmDE
np.unicode_, # mflag
np.float64, np.float64, # BTmag, e_BTmag
np.float64, np.float64, # VTmag, e_VTmag
np.float64, # prox
np.unicode_, # TYC
np.unicode_] # HIP, CCDM
"""
This list specifies the types of each column of the secondary file (as raw types)
"""
def _build_index(self):
"""
This method stores the index in memory from the index file
"""
# noinspection SpellCheckingInspection
self._index = pd.read_csv(self._index_file, sep='|', header=None, index_col=False,
names=['mstars', 'sstars', 'minra', 'maxra', 'mindec', 'maxdec'],
dtype={'mstars': np.uint32, 'sstars': np.uint16,
'minra': np.float64, 'maxra': np.float64,
'mindec': np.float64, 'maxdec': np.float64})
[docs] def empty_frame(self) -> pd.DataFrame:
"""
This simple helper function returns an empty dataframe with the appropriate columns.
:return: The empty dataframe
"""
return self._process_results([])
[docs] def nan_frame(self, index: Optional[str] = None) -> pd.DataFrame:
"""
This simple helper function returns a dataframe with a single NaN filled row.
The index of the row will either be all 0 or will be the input value
:return: The nan filled dataframe
"""
if index is not None:
return self._process_results([pd.DataFrame([[index] +
[np.nan if x != np.unicode_ else '' for x in self._dtypes[1:]]],
columns=self._names)])
else:
return self._process_results([pd.DataFrame([['0 0 0'] +
[np.nan if x != np.unicode_ else '' for x in self._dtypes[1:]]],
columns=self._names)])
[docs] def retrieve_record(self, tycho_id: str) -> pd.DataFrame:
"""
This method can be used to retrieve a single star by ID from the tycho 2 main catalogue or first supplement
file.
The star is returned in the raw Tycho catalogue format, not in the GIANT format.
:param tycho_id: The tycho id as a string with each component separated by a space
:return: The found star record, or a record filled with NaN
"""
zone = int(tycho_id.split()[0]) - 1
start = self._index.iloc[zone]
stop = self._index.iloc[zone + 1]
self._main.seek((start.mstars - 1) * self._main_line_length, os.SEEK_SET)
ind = 0
for line in self._main:
if tycho_id in line[:12]:
self._main.seek(0, os.SEEK_SET)
stream = StringIO(line)
record = pd.read_csv(stream, sep='|', header=None, index_col=False,
names=self._names, dtype=dict(zip(self._names, self._dtypes)),
na_values=[' ' * length for length in range(20)])
return self._process_results([record])
if ind == stop.mstars + 1:
break
ind += 1
self._sup1.seek((start.sstars - 1) * self._sup1_line_length, os.SEEK_SET)
ind = 0
for line in self._sup1:
if tycho_id in line[:12]:
self._sup1.seek(0, os.SEEK_SET)
stream = StringIO(line)
record = pd.read_csv(stream, sep='|', header=None, index_col=False,
names=self._sup1_names, dtype=dict(zip(self._sup1_names, self._sup1_dtypes)),
na_values=[' ' * length for length in range(20)])
return self._process_results([record], rtype='supp')
if ind == stop.sstars + 1:
break
ind += 1
warnings.warn('Tycho2 record for star {} not found'.format(tycho_id))
return self.nan_frame(index=tycho_id)
[docs] def query_catalogue(self, ids: Optional[ARRAY_LIKE] = None, min_ra: Real = 0, max_ra: Real = 360,
min_dec: Real = -90, max_dec: Real = 90, min_mag: Real = -4, max_mag: Real = 20,
search_center: Optional[ARRAY_LIKE] = None, search_radius: Optional[Real] = None,
new_epoch: Optional[Union[datetime, Real]] = None) -> pd.DataFrame:
"""
This method queries stars from the catalogue that meet specified constraints and returns them as a DataFrame
with columns of :attr:`.GIANT_COLUMNS`.
Stars can either be queried by ID directly or by right ascension/declination/magnitude. You cannot filter using
both with this method. If :attr:`apply_proper_motion` is ``True`` then this will shift the stars to the new
epoch input by the user (``new_epoch``) using proper motion.
:param ids: A sequence of star ids to retrieve from the catalogue. The ids are given by zone, rnz and should be
input as an iterable that yields tuples (therefore if you have a dataframe you should do
``df.itertuples(false)``
:param min_ra: The minimum ra bound to query stars from in degrees
:param max_ra: The maximum ra bound to query stars from in degrees
:param min_dec: The minimum declination to query stars from in degrees
:param max_dec: The maximum declination to query stars from in degrees
:param min_mag: The minimum magnitude to query stars from. Recall that magnitude is inverse (so lower
magnitude is a dimmer star)
:param max_mag: The maximum magnitude to query stars from. Recall that magnitude is inverse (so higher
magnitude is a dimmer star)
:param search_center: The center of a search cone as a ra/dec pair.
:param search_radius: The radius about the center of the search cone
:param new_epoch: The epoch to translate the stars to using proper motion if :attr:`apply_proper_motion` is
turned on
:return: A Pandas dataframe with columns :attr:`GIANT_COLUMNS`.
"""
if ids is not None:
out = []
for star_id in ids:
out.append(self.retrieve_record(star_id))
cat_recs = pd.concat(out)
else:
# query the catalogue to get the full records
cat_recs = self._get_all_with_criteria(min_ra=min_ra, max_ra=max_ra, min_dec=min_dec, max_dec=max_dec,
min_visual_mag=min_mag, max_visual_mag=max_mag,
search_radius=search_radius, search_center=search_center)
# drop anything that isn't well known
cat_recs = cat_recs[~cat_recs.loc[:, 'pmRA':'pmDE'].isnull().any(axis=1)]
# convert each to the format GIANT expects
giant_records = self.convert_to_giant_format(cat_recs)
# apply the proper motion if requested
if self.include_proper_motion and (new_epoch is not None):
apply_proper_motion(giant_records, new_epoch, copy=False)
return giant_records
[docs] def query_catalogue_raw(self, ids: Optional[ARRAY_LIKE] = None, min_ra: Real = 0, max_ra: Real = 360,
min_dec: Real = -90, max_dec: Real = 90,
min_visual_mag: Real = -4, max_visual_mag: Real = 20,
search_center: Optional[ARRAY_LIKE] = None,
search_radius: Optional[Real] = None) -> pd.DataFrame:
"""
This method queries stars from the catalogue that meet specified constraints and returns them as a DataFrame
where the columns are the raw catalogue columns.
Stars can either be queried by ID directly or by right ascension/declination/magnitude. You cannot filter using
both with this method. This method is not usable by GIANT and it does not apply proper motion. If you need
records that are usable by GIANT and with proper motion applied see :meth:`query_catalogue`. For details on what
the columns are refer to the Tycho 2 documentation (can be found online).
:param ids: A sequence of star ids to retrieve from the catalogue. The ids are given by string and should be
``'{TYC1} {TYC2} {TYC3}'`` where ``TYC*`` are the 3 components of the Tycho ID of the star.
:param min_ra: The minimum ra bound to query stars from in degrees
:param max_ra: The maximum ra bound to query stars from in degrees
:param min_dec: The minimum declination to query stars from in degrees
:param max_dec: The maximum declination to query stars from in degrees
:param min_visual_mag: The minimum visual magnitude to query stars from. Recall that magnitude is inverse (so
lower magnitude is a dimmer star)
:param max_visual_mag: The maximum visual magnitude to query stars from. Recall that magnitude is inverse (so
higher magnitude is a dimmer star)
:param search_center: The center of a search cone as a ra/dec pair.
:param search_radius: The radius about the center of the search cone
:return: A Pandas dataframe with the original columns form the star catalogue.
"""
if ids is not None:
out = []
for star_id in ids:
out.append(self.retrieve_record(star_id))
return pd.concat(out)
else:
# query the catalogue to get the full records
return self._get_all_with_criteria(min_ra=min_ra, max_ra=max_ra, min_dec=min_dec, max_dec=max_dec,
min_visual_mag=min_visual_mag, max_visual_mag=max_visual_mag,
search_radius=search_radius, search_center=search_center)
def _get_all_with_criteria(self, min_ra: Real = 0., max_ra: Real = 360., min_dec: Real = -90., max_dec: Real = 90.,
search_center: Optional[ARRAY_LIKE] = None, search_radius: Optional[Real] = None,
max_visual_mag: Real = 20., min_visual_mag: Real = -1.44,
max_b_mag: Real = 20., min_b_mag: Real = -1.44,
) -> pd.DataFrame:
"""
This function gets all stars meeting the criteria from the catalogue, yielding the results as DataFrames.
In general, the user should not interact with this method and instead should use :meth:`query_catalogue_raw`.
:param min_ra: The minimum ra bound to query stars from in degrees
:param max_ra: The maximum ra bound to query stars from in degrees
:param min_dec: The minimum declination to query stars from in degrees
:param max_dec: The maximum declination to query stars from in degrees
:param min_visual_mag: The minimum visual magnitude to query stars from. Recall that magnitude is inverse (so
lower magnitude is a dimmer star)
:param max_visual_mag: The maximum visual magnitude to query stars from. Recall that magnitude is inverse (so
higher magnitude is a dimmer star)
:param min_b_mag: The minimum b magnitude to query stars from. Recall that magnitude is inverse (so
lower magnitude is a dimmer star)
:param max_b_mag: The maximum b magnitude to query stars from. Recall that magnitude is inverse (so
higher magnitude is a dimmer star)
:param search_center: The center of a search cone as a ra/dec pair.
:param search_radius: The radius about the center of the search cone
:return: An Iterable of Pandas dataframes with columns according to the catalogue columns.
"""
# retrieve the required columns from the index for ease of use
ind_min_ra = self._index.minra
ind_max_ra = self._index.maxra
ind_min_dec = self._index.mindec
ind_max_dec = self._index.maxdec
# determine which GSC zones we need to check
if search_center is not None:
min_ra2 = search_center[0] - search_radius
max_ra2 = search_center[0] + search_radius
min_dec2 = search_center[1] - search_radius
max_dec2 = search_center[1] + search_radius
if min_dec2 < -90:
# if the search region includes the south pole take all RA
min_dec2 = -90
min_ra2 = 0
max_ra2 = 360
elif max_dec2 > 90:
# if the search region includes the north pole take all RA
max_dec2 = 90
min_ra2 = 0
max_ra2 = 360
# if the first point of ares is included then we need to query both the high and low RA
if (min_ra2 < 0) and (max_ra2 < 360):
index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) &
(ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \
((((ind_min_ra <= max_ra2) & (ind_max_ra >= 0)) |
((ind_min_ra <= 360) & (ind_max_ra >= min_ra2 + 360))) &
(ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2))
elif (min_ra2 < 0) and (max_ra2 >= 360): # if we need the whole ra band
min_ra2 = 0
max_ra2 = 360
index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) &
(ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \
((ind_min_ra <= max_ra2) & (ind_max_ra >= min_ra2)
(ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2))
elif max_ra2 >= 360:
# if the first point of ares is included then we need to query both the high and low RA
index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) &
(ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \
((((ind_min_ra <= 360) & (ind_max_ra >= min_ra2)) |
((ind_min_ra <= max_ra2-360) & (ind_max_ra >= 0))) &
(ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2))
else:
# other wise nothing special
index_check = ((ind_min_ra <= max_ra) & (ind_max_ra >= min_ra) &
(ind_min_dec <= max_dec) & (ind_max_dec >= min_dec)) & \
((ind_min_ra <= max_ra2) & (ind_max_ra >= min_ra2)
(ind_min_dec <= max_dec2) & (ind_max_dec >= min_dec2))
else:
index_check = ((ind_min_ra < max_ra) & (ind_max_ra > min_ra) &
(ind_min_dec < max_dec) & (ind_max_dec > min_dec))
# ################################################# MAIN FILE ##################################################
# get the start and stop lines for each zone we need to consider
start_lines = self._index.loc[index_check].mstars - 1 # type: pd.Series
end_lines = self._index.loc[start_lines.index + 1].mstars # type: pd.Series
results = []
for start, stop in zip(start_lines, end_lines):
# seek to the proper point in the file
self._main.seek(start * self._main_line_length, os.SEEK_SET)
# read the file for the current GSC chunk
df = pd.read_csv(self._main, sep='|', header=None, index_col=False,
names=self._names, dtype=dict(zip(self._names, self._dtypes)),
nrows=stop - start + 1, na_values=[' ' * length for length in range(20)])
# perform comparisons
visual_check = (df.VTmag >= min_visual_mag) & (df.VTmag <= max_visual_mag)
b_check = (df.BTmag >= min_b_mag) & (df.BTmag <= max_b_mag)
test = ((df.RAmdeg >= min_ra) & (df.RAmdeg <= max_ra) & (df.DEmdeg >= min_dec) & (df.DEmdeg <= max_dec) &
((visual_check & b_check) | (df.VTmag.isnull() & b_check) | (df.BTmag.isnull() & visual_check)))
if search_center is not None:
# check the radial distance if we're doing a cone search
test &= radec_distance(df.RAmdeg * DEG2RAD, df.DEmdeg * DEG2RAD,
search_center[0] * DEG2RAD, search_center[1] * DEG2RAD) <= (
search_radius * DEG2RAD)
# check to see if anything met the criteria
if test.any():
results.append(df.loc[test])
# ############################################### SUPPLEMENT FILE ##############################################
# get the start and stop lines for each zone we need to consider
start_lines = self._index.loc[index_check].sstars - 1 # type: pd.Series
end_lines = self._index.loc[start_lines.index + 1].sstars # type: pd.Series
sup1results = []
for start, stop in zip(start_lines, end_lines):
# seek to the proper point in the file
self._sup1.seek(start * self._sup1_line_length, os.SEEK_SET)
# if there are no supplement stars to consider
if (stop - start + 1) == 0:
continue
# read the file for the current GSC chunk
df = pd.read_csv(self._sup1, sep='|', header=None, index_col=False,
names=self._sup1_names, dtype=dict(zip(self._sup1_names, self._sup1_dtypes)),
nrows=stop - start + 1, na_values=[' ' * length for length in range(20)])
# perform comparisons
visual_check = (df.VTmag >= min_visual_mag) & (df.VTmag <= max_visual_mag)
b_check = (df.BTmag >= min_b_mag) & (df.BTmag <= max_b_mag)
test = ((df.RAdeg >= min_ra) & (df.RAdeg <= max_ra) & (df.DEdeg >= min_dec) & (df.DEdeg <= max_dec) &
((visual_check & b_check) | (df.VTmag.isnull() & b_check) | (df.BTmag.isnull() & visual_check)))
if search_center is not None:
# check the radial distance if we're doing a cone search
test &= radec_distance(df.RAdeg * DEG2RAD, df.DEdeg * DEG2RAD,
search_center[0] * DEG2RAD, search_center[1] * DEG2RAD) <= (
search_radius * DEG2RAD)
# check to see if anything met the criteria
if test.any():
sup1results.append(df.loc[test])
return pd.concat([self._process_results(results), self._process_results(sup1results, rtype='supp')])
def _process_results(self, res: List[pd.DataFrame], rtype: str = 'main') -> pd.DataFrame:
"""
This modifies the star records to use the same format, have the right index, and label whether they are
main or supplemental stars.
:param res: The frames to modify and join together
:param rtype: the type of frames, either main or supp
:return: The concatenated and modified dataframe
"""
if res:
# if we found anything then concatenate all of the results together
big_df = pd.concat(res) # type: pd.DataFrame
# split the TYC ID column into its components
# noinspection SpellCheckingInspection
tycid = big_df['TYCID'].str.split(expand=True)
big_df['TYC1'] = tycid[0].astype(np.uint16)
big_df['TYC2'] = tycid[1].astype(np.uint16)
big_df['TYC3'] = tycid[2].astype(np.uint8)
if 'supp' in rtype:
out = big_df.loc[:, 'flag':].set_index(['TYC1', 'TYC2', 'TYC3']).assign(tycho_source='supp')
return out.rename(columns=dict(zip(self._sup1_names, self._sup1_rename)))
else:
# noinspection SpellCheckingInspection
out = big_df.loc[:, 'pflag':].set_index(['TYC1', 'TYC2', 'TYC3']).assign(tycho_source='main')
return out
else:
return pd.DataFrame(columns=self._names[1:] +
['TYC1', 'TYC2', 'TYC3'] + ['tycho_source']).set_index(['TYC1', 'TYC2', 'TYC3'])
[docs]def download_tycho(target_directory: Path):
"""
This function downloads the Tycho2 catalogue from vizier to the target directory.
This is done over ftp. It requires an active internet connection that can connect to cdsarc.u-strasbg.fr
.. warning::
This download will take a long time and use up approximately 500 MB of space.
.. warning::
This download has no way to verify the integrity of the files because no hash is provided. While the vizier
service is trusted, use this function at your own risk
:param target_directory: the directory to save the Tycho catalogue to
"""
# we minimize the security risk here by using FTPS
import ftplib # nosec
from gzip import decompress
target_directory.mkdir(exist_ok=True, parents=True)
# FTPS is secure
ftp = ftplib.FTP_TLS('cdsarc.u-strasbg.fr') # nosec
# anonymous login since we're just grabbing data
ftp.connect()
ftp.sendcmd('USER anonymous')
ftp.sendcmd('PASS anonymous@a.com')
ftp.cwd('pub/cats/I/259/')
lines = []
ftp.retrlines('LIST', callback=lines.append)
tyc_file = target_directory / "tyc2.dat"
if tyc_file.exists():
# need to delete this file since we will append to it
tyc_file.unlink()
for line in lines:
# file
name = line.split()[-1]
if ".dat" in name:
start = time.time()
if 'tyc2' in name:
local = target_directory / "tyc2.dat"
mode = "ab"
else:
local = target_directory / name.replace('.gz', '')
mode = 'wb'
with local.open(mode) as download_file:
writer = download_file.write
# noinspection PyTypeChecker,SpellCheckingInspection
ftp.retrbinary('RETR {}'.format(name), writer)
print('{} done in {:.3f}'.format(name, time.time()-start), flush=True)
# there is some risk here because no hash is provided with the files but what can you do?
print('decompressing the data')
for file in target_directory.glob('*'):
with file.open('rb') as out_file:
decompressed = decompress(out_file.read())
with file.open('wb') as out_file:
out_file.write(decompressed)