-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathupdatelads.py
executable file
·432 lines (381 loc) · 16.9 KB
/
updatelads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
#!/usr/bin/env python
from __future__ import (division, print_function, absolute_import, unicode_literals)
import sys
import os
import shutil
import fnmatch
import datetime
import calendar
import subprocess
import re
import time
import subprocess
from optparse import OptionParser
import requests
import logging
from config_utils import retrieve_cfg
from api_interface import api_connect
from download_lads import downloadLads
from io import StringIO
from pathlib import Path
# Global static variables
ERROR = 1
SUCCESS = 0
NPP_START_YEAR = 2017
JPSS1_START_YEAR = 2021 # quarterly processing will reprocess back to the
# start year to make sure all data is up to date
# Landsat 8 was launched on Feb. 11, 2013
# Landsat 9 was launched on Sept. 27, 2021
##NOTE: For non-ESPA environments, the TOKEN needs to be defined. This is
##the application token that is required for accessing the LAADS data
##https://ladsweb.modaps.eosdis.nasa.gov/tools-and-services/data-download-scripts/
TOKEN = os.environ.get('LAADS_TOKEN', None)
USERAGENT = 'espa.cr.usgs.gov/updatelads.py 1.4.1--' + sys.version.replace('\n','').replace('\r','')
# leap day start/end of month
ldaySOM = [ 1, 32, 61, 92, 122, 153, 183, 214, 245, 275, 306, 336]
ldayEOM = [31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366]
# regular day start/end of month
rdaySOM = [ 1, 32, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335]
rdayEOM = [31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334, 365]
def geturl(url, token=None, out=None):
"""
Pulls the file specified by URL. If there is a problem with the
connection, then retry up to 5 times.
Args:
url: URL for the file to be downloaded
token: application token for the desired website
out: directory where the downloaded file will be written
Returns: None
"""
# get the logger
logger = logging.getLogger(__name__)
# get the headers for the application data download
headers = {'user-agent' : USERAGENT}
if not token is None:
headers['Authorization'] = 'Bearer ' + token
# Use CURL to download the file
import subprocess
try:
# Setup CURL command using silent mode and change location if reported
args = ['curl', '--fail', '-sS', '-L', '--retry', '5',
'--retry-delay', '60', '--get', url]
for (k,v) in list(headers.items()):
args.extend(['-H', ': '.join([k, v])])
if out is None:
# python3's subprocess.check_output returns stdout as a
# byte string
result = subprocess.check_output(args)
return result.decode('utf-8') if isinstance(result, bytes) else result
else:
# download of the actual LAADS data product
retval = subprocess.call(args, stdout=out)
# make sure the download was successful or retry up to 5 more times
# and sleep in between
if retval:
retry_count = 1
while ((retry_count <= 5) and (retval)):
time.sleep(60)
logger.info('Retry {} of download for {}'
.format(retry_count, url))
retval = subprocess.call(args, stdout=out)
retry_count += 1
if retval:
logger.warn('Unsuccessful download of {} (retried 5 times)'
.format(url))
except subprocess.CalledProcessError as e:
msg = ('curl GET error for URL (). {}:{}'
.format(url, e.message, e.output))
logger.error(msg)
return None
def getLadsData (auxdir, year, today, token):
"""
Description: getLadsData downloads the daily VIIRS atmosphere data files
for the desired year.
Args:
auxdir: name of the base LASRC_SR auxiliary directory which contains the
LAADS directory
year: year of LAADS data to be downloaded and processed (integer)
today: specifies if we are just bringing the LAADS data up to date vs.
reprocessing the data
token: application token for the desired website
Returns:
ERROR: error occurred while processing
SUCCESS: processing completed successfully
"""
# get the logger
logger = logging.getLogger(__name__)
# determine the directory for the output auxiliary data files to be
# processed. create the directory if it doesn't exist.
outputDir = '{}/LADS/{}'.format(auxdir, year)
if not os.path.exists(outputDir):
msg = '{} does not exist... creating'.format(outputDir)
logger.info(msg)
os.makedirs(outputDir, 0o777)
# if the specified year is the current year, only process up through
# today (actually 2 days earlier due to the LAADS data lag) otherwise
# process through all the days in the year
now = datetime.datetime.now()
if year == now.year:
# start processing LAADS data with a 2-day time lag. if the 2-day lag
# puts us into last year, then we are done with the current year.
day_of_year = now.timetuple().tm_yday - 2
if day_of_year <= 0:
return SUCCESS
else:
if calendar.isleap(year):
day_of_year = 366
else:
day_of_year = 365
# set the download directory in /tmp/lads
dloaddir = '/tmp/lads/{}'.format(year)
# make sure the download directory exists or create it and all necessary
# parent directories
if not os.path.exists(dloaddir):
msg = '{} does not exist... creating'.format(dloaddir)
logger.info(msg)
os.makedirs(dloaddir, 0o777)
else:
# directory already exists and possibly has files in it. any old
# files need to be cleaned up
msg = 'Cleaning download directory: {}'.format(dloaddir)
logger.info(msg)
for myfile in os.listdir(dloaddir):
name = os.path.join(dloaddir, myfile)
if not os.path.isdir(name):
os.remove(name)
# loop through each day in the year and process the LAADS data. process
# in the reverse order so that if we are handling data for "today", then
# we can stop as soon as we find the current DOY has been processed.
for doy in range(day_of_year, 0, -1):
# get the year + DOY string
datestr = '{}{:03d}'.format(year, doy)
# if the JPSS1 data for the current year and doy exists already, then
# we are going to skip that file if processing for the --today. For
# --quarterly, we will completely reprocess. If the backup NPP
# product exists without the JPSS1, then we will still reprocess in
# hopes that the JPSS1 product becomes available.
skip_date = False
for myfile in os.listdir(outputDir):
if fnmatch.fnmatch (myfile, 'VJ104ANC.A{}*.h5'.format(datestr)) \
and today:
msg = ('JPSS1 product for VJ104ANC.A{} already exists. Skip.'
.format(datestr))
logger.info(msg)
skip_date = True
break
if skip_date:
continue
# download the daily LAADS files for the specified year and DOY. The
# JPSS1 file is the priority, but if that isn't found then the NPP
# file will be downloaded.
found_vj104anc = False
found_vnp04anc = False
status = downloadLads (year, doy, dloaddir, token)
if status == ERROR:
# warning message already printed
return ERROR
# get the JPSS1 file for the current DOY (should only be one)
fileList = [] # create empty list to store files matching date
for myfile in os.listdir(dloaddir):
if fnmatch.fnmatch (myfile, 'VJ104ANC.A{}*.h5'.format(datestr)):
fileList.append (myfile)
# make sure files were found or search for the NPP file
nfiles = len(fileList)
if nfiles == 0:
# get the NPP file for the current DOY (should only be one)
for myfile in os.listdir(dloaddir):
if fnmatch.fnmatch (myfile, 'VNP04ANC.A{}*.h5'
.format(datestr)):
fileList.append (myfile)
# make sure files were found
nfiles = len(fileList)
if nfiles != 0:
# if only one file was found which matched our date, then that
# is the file we'll process. if more than one was found, then
# we have a problem as only one file is expected.
if nfiles == 1:
found_vnp04anc = True
viirs_anc = dloaddir + '/' + fileList[0]
else:
msg = ('Multiple LAADS VNP04ANC files found for doy {} '
'year {}'.format(doy, year))
logger.error(msg)
return ERROR
else:
# if only one file was found which matched our date, then that's
# the file we'll process. if more than one was found, then we
# have a problem as only one file is expected.
if nfiles == 1:
found_vj104anc = True
viirs_anc = dloaddir + '/' + fileList[0]
else:
msg = ('Multiple LAADS VJ104ANC files found for doy {} year {}'
.format(doy, year))
logger.error(msg)
return ERROR
# make sure at least one of the JPSS1 or NPP files is present
if not found_vj104anc and not found_vnp04anc:
msg = ('Neither the JPSS1 nor NPP data is available for doy {} '
'year {}. Skipping this date.'.format(doy, year))
logger.warning(msg)
continue
# determine the month and day (both 1-based) for this year/doy date
# indx will be 0-based for the array
if calendar.isleap(year):
for indx in range(0, len(ldayEOM)):
if doy <= ldayEOM[indx]:
break;
month = indx+1
day = doy - ldaySOM[indx] + 1
else:
for indx in range(0, len(rdayEOM)):
if doy <= rdayEOM[indx]:
break;
month = indx+1
day = doy - rdaySOM[indx] + 1
# generate the command-line arguments and executable for gap-filling
# the VIIRS product (works the same for either VJ104ANC or VNP04ANC)
cmdstr = ('gapfill_viirs_aux --viirs_aux {} --month {} --day {} '
'--year {}'.format(viirs_anc, month, day, year))
msg = 'Executing {}'.format(cmdstr)
logger.info(msg)
(status, output) = subprocess.getstatusoutput (cmdstr)
logger.info(output)
exit_code = status >> 8
if exit_code != 0:
msg = ('Error running gap_fill for year {}, DOY {}: {}'
.format(year, doy, cmdstr))
logger.error(msg)
return ERROR
# move the gap-filled file to the output directory
msg = ('Moving downloaded file {} to {}'
.format(viirs_anc, outputDir))
logger.debug(msg)
viirs_name = Path(viirs_anc).name
shutil.move(viirs_anc, os.path.join(outputDir, viirs_name))
# end for doy
return SUCCESS
############################################################################
# Description: Main routine which grabs the command-line arguments, determines
# which years/days of data need to be processed, then processes the user-
# specified dates of LAADS data.
#
# Developer(s):
# Gail Schmidt, USGS EROS - Original development
#
# Returns:
# ERROR - error occurred while processing
# SUCCESS - processing completed successfully
#
# Notes:
# 1. This script can be called with the --today option or with a combination
# of --start_year / --end_year. --today trumps --quarterly and
# --start_year / --end_year.
# 2. --today will process the data for the most recent year (including the
# previous year if the DOY is within the first month of the year). Thus
# this option is used for nightly updates. If the gapfilled VJ104 data
# products already exist for a particular year/doy, they will not be
# reprocessed.
# 3. --quarterly will process the data for today all the way back to the
# earliest year so that any updated LAADS files are picked up and
# processed. Thus this option is used for quarterly updates.
# 4. Existing LAADS HDF files are removed before processing data for that
# year and DOY, but only if the downloaded auxiliary data exists for that
# date.
############################################################################
def main ():
logger = logging.getLogger(__name__) # Get logger for the module.
# get the command line arguments
parser = OptionParser()
parser.add_option ('-s', '--start_year', type='int', dest='syear',
default=0, help='year for which to start pulling LAADS data')
parser.add_option ('-e', '--end_year', type='int', dest='eyear',
default=0, help='last year for which to pull LAADS data')
parser.add_option ('--today', dest='today', default=False,
action='store_true',
help='process LAADS data up through the most recent year and DOY')
msg = ('process or reprocess all LAADS data from today back to {}'
.format(JPSS1_START_YEAR))
parser.add_option ('--quarterly', dest='quarterly', default=False,
action='store_true', help=msg)
(options, args) = parser.parse_args()
syear = options.syear # starting year
eyear = options.eyear # ending year
today = options.today # process most recent year of data
quarterly = options.quarterly # process today back to START_YEAR
# check the arguments
if (today == False) and (quarterly == False) and \
(syear == 0 or eyear == 0):
msg = ('Invalid command line argument combination. Type --help '
'for more information.')
logger.error(msg)
return ERROR
# determine the auxiliary directory to store the data
auxdir = os.environ.get('LASRC_AUX_DIR')
if auxdir is None:
msg = 'LASRC_AUX_DIR environment variable not set... exiting'
logger.error(msg)
return ERROR
# Get the application token for the LAADS https interface. for ESPA
# systems, pull the token from the config file.
if TOKEN is None:
# ESPA Processing Environment
# Read ~/.usgs/espa/processing.conf to get the URL for the ESPA API.
# Connect to the ESPA API and get the application token for downloading
# the LAADS data from the internal database.
PROC_CFG_FILENAME = 'processing.conf'
proc_cfg = retrieve_cfg(PROC_CFG_FILENAME)
rpcurl = proc_cfg.get('processing', 'espa_api')
server = api_connect(rpcurl)
if server:
token = server.get_configuration('aux.downloads.laads.token')
else:
# Non-ESPA processing. TOKEN needs to be defined at the top of this
# script.
token = TOKEN
if token is None:
logger.error('Application token is None. This needs to be a valid '
'token provided for accessing the LAADS data. '
'https://ladsweb.modaps.eosdis.nasa.gov/tools-and-services/data-download-scripts/')
return ERROR
# if processing today then process the current year. if the current
# DOY is within the first month, then process the previous year as well
# to make sure we have all the recently available data processed.
now = datetime.datetime.now()
if today:
msg = 'Processing LAADS data up to the most recent year and DOY.'
logger.info(msg)
day_of_year = now.timetuple().tm_yday
eyear = now.year
if day_of_year <= 31:
syear = eyear - 1
else:
syear = eyear
elif quarterly:
msg = 'Processing LAADS data back to {}'.format(JPSS1_START_YEAR)
logger.info(msg)
eyear = now.year
syear = JPSS1_START_YEAR
msg = 'Processing LAADS data for {} - {}'.format(syear, eyear)
logger.info(msg)
for yr in range(eyear, syear-1, -1):
msg = 'Processing year: {}'.format(yr)
logger.info(msg)
status = getLadsData(auxdir, yr, today, token)
if status == ERROR:
msg = ('Problems occurred while processing LAADS data for year {}'
.format(yr))
logger.error(msg)
return ERROR
msg = 'LAADS processing complete.'
logger.info(msg)
return SUCCESS
if __name__ == "__main__":
# setup the default logger format and level. log to STDOUT.
logging.basicConfig(format=('%(asctime)s.%(msecs)03d %(process)d'
' %(levelname)-8s'
' %(filename)s:%(lineno)d:'
'%(funcName)s -- %(message)s'),
datefmt='%Y-%m-%d %H:%M:%S',
level=logging.INFO)
sys.exit (main())