Source code for panstamps.downloader

#!/usr/local/bin/python
# encoding: utf-8
"""
downloader
===========

*Tools to download the PS1 3Pi image stamps from STScI PanSTARRS image server*

The stamp server can be found `here <http://ps1images.stsci.edu/cgi-bin/ps1cutouts>`_

Author
: David Young
"""
from __future__ import print_function
from __future__ import division
from fundamentals import tools
from builtins import zip
from builtins import str
from builtins import object
from past.utils import old_div
import sys
import os
import re
os.environ['TERM'] = 'vt100'


[docs] class downloader(object): """ *Tools to download the panstarrs image stamps from STScI PanSTARRS image server* **Key Arguments** - ``log`` -- logger - ``settings`` -- the settings dictionary - ``downloadDirectory`` -- the path to where you want to download the images to. Downlaods to path command is run from by default. - ``fits`` -- download the fits files? Default *True* - ``jpeg`` -- download the jpeg files? Default *False* - ``arcsecSize`` -- the size of the image stamps to download (1 arcsec == 4 pixels). Default *60* - ``filterSet`` -- the filter set used to create color and/or download as individual stamps. Default *gri* - ``color`` -- download the color jpeg? Default *True* - ``singleFilters`` -- download the single filter stmaps? Default *False* - ``ra`` -- ra in decimal degrees. - ``dec`` -- dec in decimal degrees. - ``imageType`` -- warp or stacked images? Default *stack* - ``mjdStart`` -- the start of a time-window within which the images required are taken. Default *False* (everything) - ``mjdEnd`` -- the end of a time-window within which the images required are taken. Default *False* (everything) **Usage** The following will return 3 lists of paths to local fits, jpeg and color-jpeg files: ```python from panstamps.downloader import downloader fitsPaths, jpegPaths, colorPath = downloader( log=log, settings=False, fits=False, jpeg=True, arcsecSize=600, filterSet='gri', color=True, singleFilters=True, ra="70.60271", dec="-21.72433", imageType="stack", mjdStart=False, mjdEnd=False, window=False ).get() ``` """ # Initialisation def __init__( self, log, downloadDirectory=False, settings=False, fits=True, jpeg=False, arcsecSize=60, filterSet='gri', color=True, singleFilters=True, ra=False, dec=False, imageType="stack", mjdStart=False, mjdEnd=False, window=False ): self.log = log log.debug("instansiating a new 'downloader' object") self.settings = settings self.fits = fits self.jpeg = jpeg self.arcsecSize = arcsecSize self.filterSet = filterSet self.color = color self.singleFilters = singleFilters self.ra = ra self.dec = dec self.imageType = imageType self.downloadDirectory = downloadDirectory self.window = window try: self.mjdEnd = float(mjdEnd) except: self.mjdEnd = mjdEnd try: self.mjdStart = float(mjdStart) except: self.mjdStart = mjdStart # xt-self-arg-tmpx return None # Method Attributes
[docs] def get(self): """ *download the requested jpegs and fits files* **Return** - ``fitsPaths`` -- a list of local paths to downloaded fits files - ``jpegPaths`` -- a list of local paths to downloaded jpeg files - ``colorPath`` -- a list of local paths to downloaded color jpeg file (just one image) """ self.log.debug('starting the ``get`` method') fitsPaths = [] jpegPaths = [] colorPath = [] # REQUEST THE URL FROM STAMP SERVER content, status_code, url = self.get_html_content() if int(status_code) != 200: message = 'cound not download the image stamps. The STScI PanSTARRS image server returned HTTP status code %(status_code)s' % locals( ) self.log.error(message) raise IOError(message) # CHECK WE ARE IN THE PS1 FOOTPRINT if "No PS1 3PI images were found" in str(content): self.log.warning( "No images found. PS1 3Pi has not covered this area of the sky. Here's the requested URL:\n%(url)s" % locals()) return [], [], [] # PARSE IMAGE URLS FROM HTML CONTENT allStacks, allWarps, colorImage = self.parse_html_for_image_urls_and_metadata( content=content ) # GENERATE A DIRECTORY NAME IF ON DOWNLOAD DIRECTORY SPECIFIED if not self.downloadDirectory: ra = self.ra dec = self.dec try: dec = float(dec) if dec > 0: sign = "p" else: sign = "m" dec = abs(dec) directoryName = """%(ra)s%(sign)s%(dec)s""" % locals() except: if dec[0] == "-": dec = "m" + dec[1:] elif dec[0] == "+": dec = "p" + dec[1:] else: dec = "p" + dec directoryName = """%(ra)s%(dec)s""" % locals() directoryName = directoryName.replace(":", "") downloadDirectory = directoryName else: downloadDirectory = self.downloadDirectory # RECURSIVELY CREATE MISSING DIRECTORIES if not os.path.exists(downloadDirectory): os.makedirs(downloadDirectory) # IF SINGLE FILTER STAMPS HAVE BEEN REQUESTED if self.singleFilters: for images in [allStacks, allWarps]: urls = [] # DOWNLOAD THE FITS FILES? fitsFilenames = [] if self.fits: fitsFilenames[:] = [ t + ".fits" for t in images["filenames"]] urls += images["fits"] fitsPaths += self._download_images( urls=urls, filenames=fitsFilenames, downloadDirectory=downloadDirectory ) # DOWNLOAD THE JPEGS FILES? urls = [] jpegFilenames = [] if self.jpeg: jpegFilenames[:] = [ t + ".jpeg" for t in images["filenames"]] urls += images["jpegs"] jpegPaths += self._download_images( urls=urls, filenames=jpegFilenames, downloadDirectory=downloadDirectory ) # IF COLOR STAMPS HAS BEEN REQUESTED if self.color: theseFilenames = [] theseFilenames[:] = [t + ".jpeg" for t in colorImage["filename"]] colorPath += self._download_images( urls=colorImage["jpeg"], filenames=theseFilenames, downloadDirectory=downloadDirectory ) self.log.debug('completed the ``get`` method') if len(fitsPaths + jpegPaths + colorPath) == 0: self.log.warning( "No images found. Your options may not be set correctly. Here's the requested URL:\n%(url)s" % locals()) return fitsPaths, jpegPaths, colorPath
[docs] def get_html_content( self): """ *Build the URL for the stamp request and extract the HTML content* **Return** - ``content`` -- the HTML content of the requested URL - ``status_code`` -- the HTTP status code of the request response - ``url`` -- the URL requested from the PS1 stamp server **Usage** ```python from panstamps.downloader import downloader content, status_code, url = downloader( log=log, settings=False, fits=False, jpeg=True, arcsecSize=600, filterSet='gri', color=True, singleFilters=True, ra="70.60271", dec="-21.72433", imageType="stack", mjdStart=False, mjdEnd=False, window=False ).get_html_content() print(status_code) # OUT: 200 print(url) # OUT: http://ps1images.stsci.edu/cgi-bin/ps1cutouts?filter=gri&filter=color&catlist=&autoscale=99.500000&verbose=0&output_size=2400&filetypes=stack&pos=70.60271+-21.72433&size=2400 ``` """ self.log.debug('starting the ``get_html_content`` method') import requests r = self.ra d = self.dec pos = """%(r)s %(d)s""" % locals() filterSet = list(self.filterSet) if self.color: filterSet.append("color") fitsSize = int(self.arcsecSize * 4) jpegSize = fitsSize if jpegSize < 1200: jpegSize = 1200 try: response = requests.get( url="http://ps1images.stsci.edu/cgi-bin/ps1cutouts", params={ "pos": pos, "filter": filterSet, "filetypes": self.imageType, "size": fitsSize, "output_size": jpegSize, "verbose": "0", "autoscale": "99.500000", "catlist": "", }, ) except requests.exceptions.RequestException: print('HTTP Request failed') self.log.debug('completed the ``get_html_content`` method') return str(response.content), response.status_code, str(response.url)
[docs] def parse_html_for_image_urls_and_metadata( self, content): """ *parse html for image urls and metadata* **Key Arguments** - ``content`` -- the content of the requested PS1 stamp HTML page **Usage** Note if you want to constrain the images you download with a temporal window then make sure to given values for `mjdStart` and `mjdEnd`. ```python from panstamps.downloader import downloader mydownloader = downloader( log=log, settings=False, fits=False, jpeg=True, arcsecSize=600, filterSet='gri', color=True, singleFilters=True, ra="70.60271", dec="-21.72433", imageType="stack", mjdStart=False, mjdEnd=False, window=False ) content, status_code, url = mydownloader.get_html_content() allStacks, allWarps, colorImage = mydownloader.parse_html_for_image_urls_and_metadata(content=content) for k,v in allStacks.items(): print(k, v) # OUT: ## jpegs ['http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node15/stps15.1/nebulous/23/3a/7187453864.gpc1%3ALAP.PV3.20140730%3A2015%3A01%3A29%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4297354.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/de/fa/5761784572.gpc1%3ALAP.PV3.20140730%3A2014%3A12%3A25%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4106421.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/1b/d7/5756633973.gpc1%3ALAP.PV3.20140730%3A2014%3A12%3A25%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4097309.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400'] ## fits ['http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node15/stps15.1/nebulous/23/3a/7187453864.gpc1:LAP.PV3.20140730:2015:01:29:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4297354.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.g.unconv.fits', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/de/fa/5761784572.gpc1:LAP.PV3.20140730:2014:12:25:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4106421.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.r.unconv.fits', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/1b/d7/5756633973.gpc1:LAP.PV3.20140730:2014:12:25:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4097309.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.i.unconv.fits'] ## filters ['g', 'r', 'i'] ## filenames ['stack_g_ra70.602710_dec-21.724330_arcsec600_skycell0812.050', 'stack_r_ra70.602710_dec-21.724330_arcsec600_skycell0812.050', 'stack_i_ra70.602710_dec-21.724330_arcsec600_skycell0812.050'] ``` **Return** - ``allStacks`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames. - ``allWarps`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames. - ``colorImage`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames. """ self.log.debug( 'starting the ````parse_html_for_image_urls_and_metadata`` method') # SETUP THE VARIABLES stackFitsUrls = [] warpFitsUrls = [] stackJpegUrls = [] warpJpegUrls = [] colorJpegUrl = [] stackFitsFilename = [] warpFitsFilename = [] stackJpegFilename = [] warpJpegFilename = [] colorJpegFilename = [] allStacks = { "jpegs": [], "fits": [], "filenames": [], "filters": [] } allWarps = { "jpegs": [], "fits": [], "filenames": [] } colorImage = { "jpeg": [], "filename": [] } # USE REGEX TO FIND FITS URLS reFitscutouts = re.compile( r"""<th>(3PI )?(?P<imagetype>\w+)\s+(?P<skycellid>\d+.\d+)\s+(?P<ffilter>[\w\\]+)(\s+(?P<mjd>\d+\.\d+))?(\s<a.*\(warning\)</a>)?<br.*?href="(http:)?//ps1images.*?Display</a>.*?Fits cutout" href="(?P<fiturl>(http:)?//ps1images.*?\.fits)".*?</th>""", re.I | re.S) thisIter = reFitscutouts.finditer(content) for item in thisIter: imagetype = item.group("imagetype") skycellid = item.group("skycellid") ffilter = item.group("ffilter") fiturl = item.group("fiturl").replace("&amp;", "&") if fiturl[0:5] != "http": fiturl = "http:" + fiturl mjd = item.group("mjd") if imagetype == "stack": stackFitsUrls.append(fiturl) elif imagetype == "warp": warpFitsUrls.append(fiturl) # USE REGEX TO FIND JPEG URLS reJpegs = re.compile( r"""<img src="(?P<jpegUrl>(http:)?//ps1images.*?skycell.*?)\"""", re.I | re.S) thisIter = reJpegs.finditer(content) for item in thisIter: jpegUrl = item.group("jpegUrl").replace("&amp;", "&") if jpegUrl[0:5] != "http": jpegUrl = "http:" + jpegUrl if "red" in jpegUrl and "blue" in jpegUrl: colorJpegUrl.append(jpegUrl) elif ".wrp." in jpegUrl: warpJpegUrls.append(jpegUrl) elif ".stk." in jpegUrl: stackJpegUrls.append(jpegUrl) else: self.log.warning( "We are not downloading this jpeg: '%(jpegUrl)s'" % locals()) # USE REGEX TO FIND FITS METADATA (STACKS) reFitsMeta = re.compile( r'http?.*?\?.*?skycell\.(?P<skycell>\d+\.\d+).*?x=(?P<ra>\d+\.\d+).*?y=(?P<dec>[+|-]?\d+\.\d+).*?size=(?P<pixels>\d+).*?stk\.(?P<ffilter>\w+).*?fits', re.S | re.I) def filterMjd(x): return True if not self.mjdStart or (float( x) < self.mjdEnd and float(x) > self.mjdStart) else False for i in stackJpegUrls: fitsUrl = i.split("&")[0].replace("%3A", ":") for f in stackFitsUrls: if fitsUrl in f: matchObject = re.search(reFitsMeta, f) skycell = matchObject.group("skycell") ra = matchObject.group("ra") dec = matchObject.group("dec") pixels = matchObject.group("pixels") arcsec = str(int(old_div(int(pixels), 4))) ffilter = matchObject.group("ffilter") filename = """stack_%(ffilter)s_ra%(ra)s_dec%(dec)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals( ) allStacks["jpegs"].append(i) allStacks["fits"].append(f) allStacks["filenames"].append(filename) allStacks["filters"].append(ffilter) # USE REGEX TO FIND FITS METADATA (WARPS) reFitsMeta = re.compile( r'http?.*?\?.*?skycell\.(?P<skycell>\d+\.\d+).*?x=(?P<ra>\d+\.\d+).*?y=(?P<dec>[+|-]?\d+\.\d+).*?size=(?P<pixels>\d+).*?wrp\.(?P<ffilter>\w+)\.(?P<mjd>\d+\_\d+).*?fits', re.S | re.I) # GIVEN A RANGE IN MJDs OR NO MJDs if (self.mjdStart and self.mjdEnd) or not (self.mjdStart or self.mjdEnd): for i in warpJpegUrls: fitsUrl = i.split("&")[0].replace("%3A", ":") for f in warpFitsUrls: if fitsUrl in f: matchObject = re.search(reFitsMeta, f) skycell = matchObject.group("skycell") ra = matchObject.group("ra") dec = matchObject.group("dec") pixels = matchObject.group("pixels") arcsec = str(int(old_div(int(pixels), 4))) ffilter = matchObject.group("ffilter") mjd = matchObject.group("mjd").replace("_", ".") if not filterMjd(mjd): continue filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals( ) allWarps["jpegs"].append(i) allWarps["fits"].append(f) allWarps["filenames"].append(filename) elif self.mjdStart: closestMjd = 99999999. for i in warpJpegUrls: fitsUrl = i.split("&")[0].replace("%3A", ":") for f in warpFitsUrls: if fitsUrl in f: matchObject = re.search(reFitsMeta, f) skycell = matchObject.group("skycell") ra = matchObject.group("ra") dec = matchObject.group("dec") pixels = matchObject.group("pixels") arcsec = str(int(old_div(int(pixels), 4))) ffilter = matchObject.group("ffilter") mjd = float(matchObject.group("mjd").replace("_", ".")) if not mjd > self.mjdStart or mjd > closestMjd: continue closestMjd = mjd filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals( ) allWarps["jpegs"] = [i] allWarps["fits"] = [f] allWarps["filenames"] = [filename] mjdDiff = (closestMjd - self.mjdStart) * 24 * 60 * 60 window = self.window if window: window = abs(self.window) if mjdDiff > window: print( "No warp image was found within %(window)s sec after requested MJD" % locals()) allWarps["jpegs"] = [] allWarps["fits"] = [] allWarps["filenames"] = [] print( "The closest selected warp was taken %(mjdDiff)0.1f sec after the requested MJD" % locals()) elif self.mjdEnd: closestMjd = 0. for i in warpJpegUrls: fitsUrl = i.split("&")[0].replace("%3A", ":") for f in warpFitsUrls: if fitsUrl in f: matchObject = re.search(reFitsMeta, f) skycell = matchObject.group("skycell") ra = matchObject.group("ra") dec = matchObject.group("dec") pixels = matchObject.group("pixels") arcsec = str(int(old_div(int(pixels), 4))) ffilter = matchObject.group("ffilter") mjd = float(matchObject.group("mjd").replace("_", ".")) if not mjd < self.mjdEnd or mjd < closestMjd: continue closestMjd = mjd filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals( ) allWarps["jpegs"] = [i] allWarps["fits"] = [f] allWarps["filenames"] = [filename] mjdDiff = (self.mjdEnd - closestMjd) * 24 * 60 * 60 window = self.window if window: window = abs(self.window) if mjdDiff > window: print( "No warp image was found within %(window)s sec before requested MJD" % locals()) allWarps["jpegs"] = [] allWarps["fits"] = [] allWarps["filenames"] = [] print( "The closest selected warp was taken %(mjdDiff)0.1f sec before the requested MJD" % locals()) # USE REGEX TO FIND COLOR IMAGE METADATA if len(colorJpegUrl): reColorMeta = re.compile( r'(?P<color>\w+)=(?P<datapath>/(data|rings).*?)&', re.S | re.I) thisIter = reColorMeta.finditer(colorJpegUrl[0]) ffilter = "" for item in thisIter: fits = item.group("datapath").replace( "%3A", ":").split("/")[-1] for j, f, n, b in zip(allStacks["jpegs"], allStacks["fits"], allStacks["filenames"], allStacks["filters"]): if fits in f: ffilter += b filename = n filename = "color_" + ffilter + "_" + \ ("_").join(filename.split("_")[2:]) colorImage["jpeg"].append(colorJpegUrl[0]) colorImage["filename"].append(filename) self.log.debug( 'completed the ``parse_html_for_image_urls_and_metadata`` method') return allStacks, allWarps, colorImage
[docs] def _download_images( self, urls=[], filenames=[], downloadDirectory=False ): """ *download images* **Key Arguments** - ``urls`` -- list of the remote URLs to download - ``filenames`` -- list filenames to rename the downloads as - ``downloadDirectory`` -- path to the download directory **Return** - ``localUrls`` -- list of the paths to local image files """ self.log.debug('starting the ``_download_images`` method') from fundamentals.download.multiobject_download import multiobject_download localUrls = multiobject_download( urlList=urls, # directory(ies) to download the documents to - can be one url or a # list of urls the same length as urlList downloadDirectory=downloadDirectory, log=self.log, timeStamp=0, timeout=180, concurrentDownloads=10, resetFilename=filenames, credentials=False, # { 'username' : "...", "password", "..." } longTime=False, indexFilenames=False ) self.log.debug('completed the ``_download_images`` method') return localUrls