Source code for desitransfer.spacewatch

# Licensed under a 3-clause BSD style license - see LICENSE.rst
# -*- coding: utf-8 -*-
"""
desitransfer.spacewatch
=======================

Download Spacewatch data from a server at KPNO.

Notes
-----
* Spacewatch data rolls over at 00:00 UTC = 17:00 MST.
* The data relevant to the previous night, say 20231030, would be downloaded
  on the morning of 20231031.
* Therefore to obtain all data of interest, just download the files that
  have already appeared in 2023/10/31/ (Spacewatch directory structure)
  the morning after DESI night 20231030.
"""
import datetime
import os
import re
from argparse import ArgumentParser
from html.parser import HTMLParser
try:
    utc = datetime.UTC
except AttributeError:
    # datetime.UTC is in Python 3.11
    import pytz
    utc = pytz.UTC
import requests
from desiutil.log import get_logger, DEBUG
from . import __version__ as dtVersion
from .common import yesterday


log = None


[docs]class SpacewatchHTMLParser(HTMLParser): """Extract JPG files from an HTML index. """ def __init__(self, *args, **kwargs): super(SpacewatchHTMLParser, self).__init__(*args, **kwargs) self.jpg_re = re.compile(r'[0-9]{8}_[0-9]{6}\.jpg') self.jpg_files = list()
[docs] def handle_starttag(self, tag, attrs): """Process HTML tags, in this case targeting anchor tags. """ if tag == 'a': href = [a[1] for a in attrs if a[0] == 'href'] if href: if self.jpg_re.match(href[0]) is not None: self.jpg_files.append(href[0])
[docs]def jpg_list(index): """Obtain a list of JPEG files from an HTML index. Parameters ---------- index : :class:`str` The URL of an HTML index. Returns ------- :class:`list` A list of JPEG files found in `index`. The `index` URL is attached to the file names. """ try: r = requests.get(index) except (requests.RequestException, requests.ConnectionError, requests.HTTPError) as e: log.critical(e.args[0]) return [] if r.status_code == 200: parser = SpacewatchHTMLParser() parser.feed(r.content.decode(r.headers['Content-Type'].split('=')[1])) return [index + j for j in parser.jpg_files] else: log.critical("Unexpected status when listing JPEG files: %d!", r.status_code) return []
[docs]def download_jpg(files, destination, overwrite=False, test=False): """Download `files` to `destination`. Parameters ---------- files : :class:`list` A list of URLs to download. destination : :class:`str` A local directory to hold the files. overwrite : :class:`str`, optional If ``True``, overwrite any existing files. test : :class:`bool`, optional If ``True``, do not download any files. Returns ------- :class:`int` The number of files downloaded. """ downloaded = 0 if not test and not os.path.isdir(destination): log.debug("os.makedirs('%s')", destination) os.makedirs(destination) for jpg in files: base_jpg = jpg.split('/')[-1] dst_jpg = os.path.join(destination, base_jpg) if os.path.exists(dst_jpg) and not overwrite: # Overwrite? log.debug("Skipping existing file: %s.", dst_jpg) pass else: log.debug("r = requests.get('%s')", jpg) if not test: r = requests.get(jpg) if r.status_code == 200: downloaded += 1 timestamp = int(datetime.datetime.strptime(r.headers['Last-Modified'], '%a, %d %b %Y %H:%M:%S %Z').replace(tzinfo=utc).timestamp()) with open(dst_jpg, 'wb') as j: j.write(r.content) os.utime(dst_jpg, (timestamp, timestamp)) return downloaded
[docs]def _options(): """Parse command-line options for :command:`desi_nightwatch_transfer`. Returns ------- :class:`argparse.Namespace` The parsed command-line options. """ desc = "Transfer Spacewatch data files." prsr = ArgumentParser(description=desc) prsr.add_argument('-d', '--debug', action='store_true', help='Set log level to DEBUG.') prsr.add_argument('-D', '--date', action='store', metavar='YYYY/MM/DD', help='Download files for a specific date instead of today.') prsr.add_argument('-o', '--overwrite', action='store_true', help='Overwrite any existing files.') prsr.add_argument('-s', '--server', metavar='SERVER', default=os.getenv('SPACEWATCH_SERVER', 'SPACEWATCH_SERVER'), help='Set the Spacwatch server name to SERVER (default "%(default)s").') prsr.add_argument('-t', '--test', action='store_true', help='Do not actually download any files; implies --debug.') prsr.add_argument('-V', '--version', action='version', version='%(prog)s {0}'.format(dtVersion)) prsr.add_argument('destination', metavar='DIR', help='Download files to DIR.') return prsr.parse_args()
[docs]def main(): """Entry point for :command:`desi_spacewatch_transfer`. Returns ------- :class:`int` An integer suitable for passing to :func:`sys.exit`. """ global log options = _options() if options.debug or options.test: log = get_logger(DEBUG) else: log = get_logger() if options.server == 'SPACEWATCH_SERVER': log.critical("Spacewatch server name is not set!") return 1 spacewatch_root = f'http://{options.server}/allsky-all/images/cropped/' if options.date is not None: today = options.date else: today = datetime.date.today().strftime("%Y/%m/%d") y = yesterday() ystrdy = f"{y[0:4]}/{y[4:6]}/{y[6:8]}" spacewatch_today = spacewatch_root + today + '/' spacewatch_yesterday = spacewatch_root + ystrdy + '/' n_files = download_jpg(jpg_list(spacewatch_today), os.path.join(options.destination, today), overwrite=options.overwrite, test=options.test) log.debug("%d files downloaded for %s.", n_files, today) if options.date is None: n_files = download_jpg(jpg_list(spacewatch_yesterday), os.path.join(options.destination, ystrdy), overwrite=options.overwrite, test=options.test) log.debug("%d files downloaded for %s.", n_files, ystrdy) return 0