Source code for panstamps.downloader
#!/usr/local/bin/python
# encoding: utf-8
"""
downloader
===========
*Tools to download the PS1 3Pi image stamps from STScI PanSTARRS image server*
The stamp server can be found `here <http://ps1images.stsci.edu/cgi-bin/ps1cutouts>`_
Author
: David Young
"""
from __future__ import print_function
from __future__ import division
from fundamentals import tools
from builtins import zip
from builtins import str
from builtins import object
from past.utils import old_div
import sys
import os
import re
os.environ['TERM'] = 'vt100'
[docs]
class downloader(object):
"""
*Tools to download the panstarrs image stamps from STScI PanSTARRS image server*
**Key Arguments**
- ``log`` -- logger
- ``settings`` -- the settings dictionary
- ``downloadDirectory`` -- the path to where you want to download the images to. Downlaods to path command is run from by default.
- ``fits`` -- download the fits files? Default *True*
- ``jpeg`` -- download the jpeg files? Default *False*
- ``arcsecSize`` -- the size of the image stamps to download (1 arcsec == 4 pixels). Default *60*
- ``filterSet`` -- the filter set used to create color and/or download as individual stamps. Default *gri*
- ``color`` -- download the color jpeg? Default *True*
- ``singleFilters`` -- download the single filter stmaps? Default *False*
- ``ra`` -- ra in decimal degrees.
- ``dec`` -- dec in decimal degrees.
- ``imageType`` -- warp or stacked images? Default *stack*
- ``mjdStart`` -- the start of a time-window within which the images required are taken. Default *False* (everything)
- ``mjdEnd`` -- the end of a time-window within which the images required are taken. Default *False* (everything)
**Usage**
The following will return 3 lists of paths to local fits, jpeg and color-jpeg files:
```python
from panstamps.downloader import downloader
fitsPaths, jpegPaths, colorPath = downloader(
log=log,
settings=False,
fits=False,
jpeg=True,
arcsecSize=600,
filterSet='gri',
color=True,
singleFilters=True,
ra="70.60271",
dec="-21.72433",
imageType="stack",
mjdStart=False,
mjdEnd=False,
window=False
).get()
```
"""
# Initialisation
def __init__(
self,
log,
downloadDirectory=False,
settings=False,
fits=True,
jpeg=False,
arcsecSize=60,
filterSet='gri',
color=True,
singleFilters=True,
ra=False,
dec=False,
imageType="stack",
mjdStart=False,
mjdEnd=False,
window=False
):
self.log = log
log.debug("instansiating a new 'downloader' object")
self.settings = settings
self.fits = fits
self.jpeg = jpeg
self.arcsecSize = arcsecSize
self.filterSet = filterSet
self.color = color
self.singleFilters = singleFilters
self.ra = ra
self.dec = dec
self.imageType = imageType
self.downloadDirectory = downloadDirectory
self.window = window
try:
self.mjdEnd = float(mjdEnd)
except:
self.mjdEnd = mjdEnd
try:
self.mjdStart = float(mjdStart)
except:
self.mjdStart = mjdStart
# xt-self-arg-tmpx
return None
# Method Attributes
[docs]
def get(self):
"""
*download the requested jpegs and fits files*
**Return**
- ``fitsPaths`` -- a list of local paths to downloaded fits files
- ``jpegPaths`` -- a list of local paths to downloaded jpeg files
- ``colorPath`` -- a list of local paths to downloaded color jpeg file (just one image)
"""
self.log.debug('starting the ``get`` method')
fitsPaths = []
jpegPaths = []
colorPath = []
# REQUEST THE URL FROM STAMP SERVER
content, status_code, url = self.get_html_content()
if int(status_code) != 200:
message = 'cound not download the image stamps. The STScI PanSTARRS image server returned HTTP status code %(status_code)s' % locals(
)
self.log.error(message)
raise IOError(message)
# CHECK WE ARE IN THE PS1 FOOTPRINT
if "No PS1 3PI images were found" in str(content):
self.log.warning(
"No images found. PS1 3Pi has not covered this area of the sky. Here's the requested URL:\n%(url)s" % locals())
return [], [], []
# PARSE IMAGE URLS FROM HTML CONTENT
allStacks, allWarps, colorImage = self.parse_html_for_image_urls_and_metadata(
content=content
)
# GENERATE A DIRECTORY NAME IF ON DOWNLOAD DIRECTORY SPECIFIED
if not self.downloadDirectory:
ra = self.ra
dec = self.dec
try:
dec = float(dec)
if dec > 0:
sign = "p"
else:
sign = "m"
dec = abs(dec)
directoryName = """%(ra)s%(sign)s%(dec)s""" % locals()
except:
if dec[0] == "-":
dec = "m" + dec[1:]
elif dec[0] == "+":
dec = "p" + dec[1:]
else:
dec = "p" + dec
directoryName = """%(ra)s%(dec)s""" % locals()
directoryName = directoryName.replace(":", "")
downloadDirectory = directoryName
else:
downloadDirectory = self.downloadDirectory
# RECURSIVELY CREATE MISSING DIRECTORIES
if not os.path.exists(downloadDirectory):
os.makedirs(downloadDirectory)
# IF SINGLE FILTER STAMPS HAVE BEEN REQUESTED
if self.singleFilters:
for images in [allStacks, allWarps]:
urls = []
# DOWNLOAD THE FITS FILES?
fitsFilenames = []
if self.fits:
fitsFilenames[:] = [
t + ".fits" for t in images["filenames"]]
urls += images["fits"]
fitsPaths += self._download_images(
urls=urls,
filenames=fitsFilenames,
downloadDirectory=downloadDirectory
)
# DOWNLOAD THE JPEGS FILES?
urls = []
jpegFilenames = []
if self.jpeg:
jpegFilenames[:] = [
t + ".jpeg" for t in images["filenames"]]
urls += images["jpegs"]
jpegPaths += self._download_images(
urls=urls,
filenames=jpegFilenames,
downloadDirectory=downloadDirectory
)
# IF COLOR STAMPS HAS BEEN REQUESTED
if self.color:
theseFilenames = []
theseFilenames[:] = [t + ".jpeg" for t in colorImage["filename"]]
colorPath += self._download_images(
urls=colorImage["jpeg"],
filenames=theseFilenames,
downloadDirectory=downloadDirectory
)
self.log.debug('completed the ``get`` method')
if len(fitsPaths + jpegPaths + colorPath) == 0:
self.log.warning(
"No images found. Your options may not be set correctly. Here's the requested URL:\n%(url)s" % locals())
return fitsPaths, jpegPaths, colorPath
[docs]
def get_html_content(
self):
"""
*Build the URL for the stamp request and extract the HTML content*
**Return**
- ``content`` -- the HTML content of the requested URL
- ``status_code`` -- the HTTP status code of the request response
- ``url`` -- the URL requested from the PS1 stamp server
**Usage**
```python
from panstamps.downloader import downloader
content, status_code, url = downloader(
log=log,
settings=False,
fits=False,
jpeg=True,
arcsecSize=600,
filterSet='gri',
color=True,
singleFilters=True,
ra="70.60271",
dec="-21.72433",
imageType="stack",
mjdStart=False,
mjdEnd=False,
window=False
).get_html_content()
print(status_code)
# OUT: 200
print(url)
# OUT: http://ps1images.stsci.edu/cgi-bin/ps1cutouts?filter=gri&filter=color&catlist=&autoscale=99.500000&verbose=0&output_size=2400&filetypes=stack&pos=70.60271+-21.72433&size=2400
```
"""
self.log.debug('starting the ``get_html_content`` method')
import requests
r = self.ra
d = self.dec
pos = """%(r)s %(d)s""" % locals()
filterSet = list(self.filterSet)
if self.color:
filterSet.append("color")
fitsSize = int(self.arcsecSize * 4)
jpegSize = fitsSize
if jpegSize < 1200:
jpegSize = 1200
try:
response = requests.get(
url="http://ps1images.stsci.edu/cgi-bin/ps1cutouts",
params={
"pos": pos,
"filter": filterSet,
"filetypes": self.imageType,
"size": fitsSize,
"output_size": jpegSize,
"verbose": "0",
"autoscale": "99.500000",
"catlist": "",
},
)
except requests.exceptions.RequestException:
print('HTTP Request failed')
self.log.debug('completed the ``get_html_content`` method')
return str(response.content), response.status_code, str(response.url)
[docs]
def parse_html_for_image_urls_and_metadata(
self,
content):
"""
*parse html for image urls and metadata*
**Key Arguments**
- ``content`` -- the content of the requested PS1 stamp HTML page
**Usage**
Note if you want to constrain the images you download with a temporal window then make sure to given values for `mjdStart` and `mjdEnd`.
```python
from panstamps.downloader import downloader
mydownloader = downloader(
log=log,
settings=False,
fits=False,
jpeg=True,
arcsecSize=600,
filterSet='gri',
color=True,
singleFilters=True,
ra="70.60271",
dec="-21.72433",
imageType="stack",
mjdStart=False,
mjdEnd=False,
window=False
)
content, status_code, url = mydownloader.get_html_content()
allStacks, allWarps, colorImage = mydownloader.parse_html_for_image_urls_and_metadata(content=content)
for k,v in allStacks.items():
print(k, v)
# OUT:
## jpegs ['http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node15/stps15.1/nebulous/23/3a/7187453864.gpc1%3ALAP.PV3.20140730%3A2015%3A01%3A29%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4297354.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/de/fa/5761784572.gpc1%3ALAP.PV3.20140730%3A2014%3A12%3A25%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4106421.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/1b/d7/5756633973.gpc1%3ALAP.PV3.20140730%3A2014%3A12%3A25%3ARINGS.V3%3Askycell.0812.050%3ARINGS.V3.skycell.0812.050.stk.4097309.unconv.fits&x=70.602710&y=-21.724330&size=2400&wcs=1&asinh=True&autoscale=99.500000&output_size=2400']
## fits ['http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node15/stps15.1/nebulous/23/3a/7187453864.gpc1:LAP.PV3.20140730:2015:01:29:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4297354.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.g.unconv.fits', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/de/fa/5761784572.gpc1:LAP.PV3.20140730:2014:12:25:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4106421.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.r.unconv.fits', 'http://ps1images.stsci.edu/cgi-bin/fitscut.cgi?red=/data/ps1/node08/stps08.1/nebulous/1b/d7/5756633973.gpc1:LAP.PV3.20140730:2014:12:25:RINGS.V3:skycell.0812.050:RINGS.V3.skycell.0812.050.stk.4097309.unconv.fits&format=fits&x=70.602710&y=-21.724330&size=2400&wcs=1&imagename=cutout_rings.v3.skycell.0812.050.stk.i.unconv.fits']
## filters ['g', 'r', 'i']
## filenames ['stack_g_ra70.602710_dec-21.724330_arcsec600_skycell0812.050', 'stack_r_ra70.602710_dec-21.724330_arcsec600_skycell0812.050', 'stack_i_ra70.602710_dec-21.724330_arcsec600_skycell0812.050']
```
**Return**
- ``allStacks`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames.
- ``allWarps`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames.
- ``colorImage`` -- dictionary of 4 equal length lists. jpeg remote urls, fits remote urls, filters and filenames.
"""
self.log.debug(
'starting the ````parse_html_for_image_urls_and_metadata`` method')
# SETUP THE VARIABLES
stackFitsUrls = []
warpFitsUrls = []
stackJpegUrls = []
warpJpegUrls = []
colorJpegUrl = []
stackFitsFilename = []
warpFitsFilename = []
stackJpegFilename = []
warpJpegFilename = []
colorJpegFilename = []
allStacks = {
"jpegs": [],
"fits": [],
"filenames": [],
"filters": []
}
allWarps = {
"jpegs": [],
"fits": [],
"filenames": []
}
colorImage = {
"jpeg": [],
"filename": []
}
# USE REGEX TO FIND FITS URLS
reFitscutouts = re.compile(
r"""<th>(3PI )?(?P<imagetype>\w+)\s+(?P<skycellid>\d+.\d+)\s+(?P<ffilter>[\w\\]+)(\s+(?P<mjd>\d+\.\d+))?(\s<a.*\(warning\)</a>)?<br.*?href="(http:)?//ps1images.*?Display</a>.*?Fits cutout" href="(?P<fiturl>(http:)?//ps1images.*?\.fits)".*?</th>""", re.I | re.S)
thisIter = reFitscutouts.finditer(content)
for item in thisIter:
imagetype = item.group("imagetype")
skycellid = item.group("skycellid")
ffilter = item.group("ffilter")
fiturl = item.group("fiturl").replace("&", "&")
if fiturl[0:5] != "http":
fiturl = "http:" + fiturl
mjd = item.group("mjd")
if imagetype == "stack":
stackFitsUrls.append(fiturl)
elif imagetype == "warp":
warpFitsUrls.append(fiturl)
# USE REGEX TO FIND JPEG URLS
reJpegs = re.compile(
r"""<img src="(?P<jpegUrl>(http:)?//ps1images.*?skycell.*?)\"""", re.I | re.S)
thisIter = reJpegs.finditer(content)
for item in thisIter:
jpegUrl = item.group("jpegUrl").replace("&", "&")
if jpegUrl[0:5] != "http":
jpegUrl = "http:" + jpegUrl
if "red" in jpegUrl and "blue" in jpegUrl:
colorJpegUrl.append(jpegUrl)
elif ".wrp." in jpegUrl:
warpJpegUrls.append(jpegUrl)
elif ".stk." in jpegUrl:
stackJpegUrls.append(jpegUrl)
else:
self.log.warning(
"We are not downloading this jpeg: '%(jpegUrl)s'" % locals())
# USE REGEX TO FIND FITS METADATA (STACKS)
reFitsMeta = re.compile(
r'http?.*?\?.*?skycell\.(?P<skycell>\d+\.\d+).*?x=(?P<ra>\d+\.\d+).*?y=(?P<dec>[+|-]?\d+\.\d+).*?size=(?P<pixels>\d+).*?stk\.(?P<ffilter>\w+).*?fits', re.S | re.I)
def filterMjd(x): return True if not self.mjdStart or (float(
x) < self.mjdEnd and float(x) > self.mjdStart) else False
for i in stackJpegUrls:
fitsUrl = i.split("&")[0].replace("%3A", ":")
for f in stackFitsUrls:
if fitsUrl in f:
matchObject = re.search(reFitsMeta, f)
skycell = matchObject.group("skycell")
ra = matchObject.group("ra")
dec = matchObject.group("dec")
pixels = matchObject.group("pixels")
arcsec = str(int(old_div(int(pixels), 4)))
ffilter = matchObject.group("ffilter")
filename = """stack_%(ffilter)s_ra%(ra)s_dec%(dec)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals(
)
allStacks["jpegs"].append(i)
allStacks["fits"].append(f)
allStacks["filenames"].append(filename)
allStacks["filters"].append(ffilter)
# USE REGEX TO FIND FITS METADATA (WARPS)
reFitsMeta = re.compile(
r'http?.*?\?.*?skycell\.(?P<skycell>\d+\.\d+).*?x=(?P<ra>\d+\.\d+).*?y=(?P<dec>[+|-]?\d+\.\d+).*?size=(?P<pixels>\d+).*?wrp\.(?P<ffilter>\w+)\.(?P<mjd>\d+\_\d+).*?fits', re.S | re.I)
# GIVEN A RANGE IN MJDs OR NO MJDs
if (self.mjdStart and self.mjdEnd) or not (self.mjdStart or self.mjdEnd):
for i in warpJpegUrls:
fitsUrl = i.split("&")[0].replace("%3A", ":")
for f in warpFitsUrls:
if fitsUrl in f:
matchObject = re.search(reFitsMeta, f)
skycell = matchObject.group("skycell")
ra = matchObject.group("ra")
dec = matchObject.group("dec")
pixels = matchObject.group("pixels")
arcsec = str(int(old_div(int(pixels), 4)))
ffilter = matchObject.group("ffilter")
mjd = matchObject.group("mjd").replace("_", ".")
if not filterMjd(mjd):
continue
filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals(
)
allWarps["jpegs"].append(i)
allWarps["fits"].append(f)
allWarps["filenames"].append(filename)
elif self.mjdStart:
closestMjd = 99999999.
for i in warpJpegUrls:
fitsUrl = i.split("&")[0].replace("%3A", ":")
for f in warpFitsUrls:
if fitsUrl in f:
matchObject = re.search(reFitsMeta, f)
skycell = matchObject.group("skycell")
ra = matchObject.group("ra")
dec = matchObject.group("dec")
pixels = matchObject.group("pixels")
arcsec = str(int(old_div(int(pixels), 4)))
ffilter = matchObject.group("ffilter")
mjd = float(matchObject.group("mjd").replace("_", "."))
if not mjd > self.mjdStart or mjd > closestMjd:
continue
closestMjd = mjd
filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals(
)
allWarps["jpegs"] = [i]
allWarps["fits"] = [f]
allWarps["filenames"] = [filename]
mjdDiff = (closestMjd - self.mjdStart) * 24 * 60 * 60
window = self.window
if window:
window = abs(self.window)
if mjdDiff > window:
print(
"No warp image was found within %(window)s sec after requested MJD" % locals())
allWarps["jpegs"] = []
allWarps["fits"] = []
allWarps["filenames"] = []
print(
"The closest selected warp was taken %(mjdDiff)0.1f sec after the requested MJD" % locals())
elif self.mjdEnd:
closestMjd = 0.
for i in warpJpegUrls:
fitsUrl = i.split("&")[0].replace("%3A", ":")
for f in warpFitsUrls:
if fitsUrl in f:
matchObject = re.search(reFitsMeta, f)
skycell = matchObject.group("skycell")
ra = matchObject.group("ra")
dec = matchObject.group("dec")
pixels = matchObject.group("pixels")
arcsec = str(int(old_div(int(pixels), 4)))
ffilter = matchObject.group("ffilter")
mjd = float(matchObject.group("mjd").replace("_", "."))
if not mjd < self.mjdEnd or mjd < closestMjd:
continue
closestMjd = mjd
filename = """warp_%(ffilter)s_ra%(ra)s_dec%(dec)s_mjd%(mjd)s_arcsec%(arcsec)s_skycell%(skycell)s""" % locals(
)
allWarps["jpegs"] = [i]
allWarps["fits"] = [f]
allWarps["filenames"] = [filename]
mjdDiff = (self.mjdEnd - closestMjd) * 24 * 60 * 60
window = self.window
if window:
window = abs(self.window)
if mjdDiff > window:
print(
"No warp image was found within %(window)s sec before requested MJD" % locals())
allWarps["jpegs"] = []
allWarps["fits"] = []
allWarps["filenames"] = []
print(
"The closest selected warp was taken %(mjdDiff)0.1f sec before the requested MJD" % locals())
# USE REGEX TO FIND COLOR IMAGE METADATA
if len(colorJpegUrl):
reColorMeta = re.compile(
r'(?P<color>\w+)=(?P<datapath>/(data|rings).*?)&', re.S | re.I)
thisIter = reColorMeta.finditer(colorJpegUrl[0])
ffilter = ""
for item in thisIter:
fits = item.group("datapath").replace(
"%3A", ":").split("/")[-1]
for j, f, n, b in zip(allStacks["jpegs"], allStacks["fits"], allStacks["filenames"], allStacks["filters"]):
if fits in f:
ffilter += b
filename = n
filename = "color_" + ffilter + "_" + \
("_").join(filename.split("_")[2:])
colorImage["jpeg"].append(colorJpegUrl[0])
colorImage["filename"].append(filename)
self.log.debug(
'completed the ``parse_html_for_image_urls_and_metadata`` method')
return allStacks, allWarps, colorImage
[docs]
def _download_images(
self,
urls=[],
filenames=[],
downloadDirectory=False
):
"""
*download images*
**Key Arguments**
- ``urls`` -- list of the remote URLs to download
- ``filenames`` -- list filenames to rename the downloads as
- ``downloadDirectory`` -- path to the download directory
**Return**
- ``localUrls`` -- list of the paths to local image files
"""
self.log.debug('starting the ``_download_images`` method')
from fundamentals.download.multiobject_download import multiobject_download
localUrls = multiobject_download(
urlList=urls,
# directory(ies) to download the documents to - can be one url or a
# list of urls the same length as urlList
downloadDirectory=downloadDirectory,
log=self.log,
timeStamp=0,
timeout=180,
concurrentDownloads=10,
resetFilename=filenames,
credentials=False, # { 'username' : "...", "password", "..." }
longTime=False,
indexFilenames=False
)
self.log.debug('completed the ``_download_images`` method')
return localUrls