#! /usr/bin/env python3
# coding=utf-8
""""""
"""
Author: radenz@tropos.de
"""
import datetime
import toml
import numpy as np
import netCDF4
import os
import gc
import subprocess
[docs]def time_list(begin, end, delta):
"""generate a time list from begin to <= end with delta
Args:
begin (datetime): start time
end (datetime): end time (included)
Returns:
list: List of generated datetime objects
"""
time_list = []
elem = begin
while elem <= end:
time_list.append(elem)
elem += datetime.timedelta(hours=delta)
return time_list
[docs]def save_item(dataset, item_data):
"""
Save an item to the dataset with the data given as a dict
Args:
dataset (:obj:netCDF4.Dataset): netcdf4 Dataset to add
item_data (dict): with the data to add, for example:
================== ===============================================================
Key Example
================== ===============================================================
``var_name`` Z
``dimension`` ('time', 'height')
``arr`` self.corr_refl_reg[:].filled()
``long_name`` "Reflectivity factor"
**optional**
``comment`` "Wind profiler reflectivity factor corrected by cloud radar"
``units`` "dBz"
``missing_value`` -200.
``plot_range`` [-50., 20.]
``plot_scale`` "linear"
``vartype`` np.float32
================== ===============================================================
"""
if 'vartype' in item_data.keys():
item = dataset.createVariable(item_data['var_name'], item_data['vartype'], item_data['dimension'])
else:
item = dataset.createVariable(item_data['var_name'], np.float32, item_data['dimension'])
item[:] = item_data['arr']
item.long_name = item_data['long_name']
if 'comment' in item_data.keys():
item.comment = item_data['comment']
if 'units' in item_data.keys():
item.units = item_data['units']
if 'units_html' in item_data.keys():
item.units_html = item_data['units_html']
if 'missing_value' in item_data.keys():
item.missing_value = item_data['missing_value']
if 'plot_range' in item_data.keys():
item.plot_range = item_data['plot_range']
if 'plot_scale' in item_data.keys():
item.plot_scale = item_data['plot_scale']
if 'axis' in item_data.keys():
item.axis = item_data['axis']
return dataset
[docs]def get_git_hash():
"""
Returns:
git describe string
"""
try:
commit_id = subprocess.check_output(['git', 'describe', '--always'])
branch = subprocess.check_output(['git', 'rev-parse', '--abbrev-ref', 'HEAD'])
except:
commit_id = ''
branch = 'not version controlled'
return commit_id.rstrip(), branch.rstrip()
[docs]class assemble_pattern():
"""
assemble a time height period by putting multiple hysplit trajectories togehter,
calculate the statistics and save to a netcdf file
Args:
config_file (str, optional): path to the config file
"""
def __init__(self, config_file='../config.toml'):
with open(config_file) as config_file:
self.config = toml.loads(config_file.read())
self.config['time']['begin_dt'] = datetime.datetime.strptime(self.config['time']['begin'],
'%Y-%m-%d_%H')
self.config['time']['end_dt'] = datetime.datetime.strptime(self.config['time']['end'],
'%Y-%m-%d_%H')
print('config', self.config)
self.dt_list = trace_source.time_list(self.config['time']['begin_dt'],
self.config['time']['end_dt'],
self.config['time']['step'])
print('dt_list', self.dt_list)
self.height_list = list(range(500, self.config['height']['top']+1, 500))
self.no_part = []
self.time_res = []
# the assemble method is model data specific
#def assemble(self, dt_range=None):
[docs] def dump2netcdf(self, model_str='hysplit'):
"""
dump the assembled data to a netcdf file repeatatly call :meth:`save_item`
configuration (directories, names, etc) is given by the config file
"""
timestamps = np.array([(dt - datetime.datetime(1970,1,1)).total_seconds() for dt in self.dt_list])
hours_cn = np.array([dt.hour + dt.minute / 60. + dt.second / 3600. for dt in self.dt_list])
if not os.path.isdir(self.config['output_dir']):
os.makedirs(self.config['output_dir'])
ncfile = self.config['output_dir'] +\
'{}_{}_{}-output.nc'.format(
self.dt_list[0].strftime('%Y%m%d'), self.config['station']['short_name'], model_str)
# ncfile = "/home/devel/" +\
# '{}_hysplit_output.nc'.format(self.config['time']['begin_dt'].strftime('%Y%m%d'))
#dataset = netCDF4.Dataset(ncfile, 'w', format='NETCDF4')
dataset = netCDF4.Dataset(ncfile, 'w', format='NETCDF3_CLASSIC')
dim_time = dataset.createDimension('time', len(self.dt_list))
dim_height = dataset.createDimension('height', len(self.height_list))
dim_age = dataset.createDimension('time_age', abs(self.config['time']['tr_duration'])+1)
dim_cat = dataset.createDimension('categories', 7)
dim_regions = dataset.createDimension('regions', len(list(self.geo_names.keys())))
dim_lats = dataset.createDimension('lat_thres', len(list(self.lat_names.keys())))
# times_cn = dataset.createVariable('time', np.float32, ('time',))
# times_cn[:] = hours_cn.astype(np.float32)
# times_cn.units = "hours since " + self.begin_dt.strftime('%Y-%m-%d') + "00:00:00 +00:00"
# times_cn.long_name = "Decimal hours from midnight UTC"
# times_cn.axis = "T"
save_item(dataset, {'var_name': 'timestamp', 'dimension': ('time', ),
'vartype': 'i4',
'arr': timestamps.astype(np.int32),
'long_name': "Unix timestamp",
'units': "s", 'axis': 'T'})
save_item(dataset, {'var_name': 'time', 'dimension': ('time', ),
'arr': hours_cn.astype(np.float32),
'long_name': "Decimal hours from midnight UTC",
'units': "hours since {} 00:00:00 +00:00".format(self.dt_list[0].strftime('%Y-%m-%d')),
'axis': 'T'})
save_item(dataset, {'var_name': 'range', 'dimension': ('height', ),
'arr': np.array(self.height_list).astype(np.float32)/1000.,
'long_name': "Height",
'units': "km", 'axis': 'Z'})
save_item(dataset, {'var_name': 'age', 'dimension': ('time_age', ),
'arr': self.raw_dict['age'][0, 0],
'long_name': "Age of trajectory",
'units': "h"})
save_item(dataset, {'var_name': 'no_part', 'dimension': ('time', ),
'arr': np.array(self.no_part),
'long_name': "number particles/trajectories",
'units': "no"})
save_item(dataset, {'var_name': 'time_res', 'dimension': ('time', ),
'arr': np.array(self.time_res),
'long_name': "backward time resolution",
'units': "no"})
for k in list(self.stat2d_dict.keys()):
print(k, self.stat2d_dict.get(k).shape)
dataset = save_item(dataset, {'var_name': k, 'dimension': ('time', 'height'),
'arr': self.stat2d_dict.get(k).copy().astype(np.float32),
'long_name': k})
# its sufficient to save the age of the trajectory once
raw_data_keys = list(self.raw_dict.keys())
raw_data_keys.remove('age')
# chance to modify some parameter descriptions for better readability
modified_params = {key: {'var_name': key.lower(),
'long_name': "Hysplit " + key.lower()} for key in raw_data_keys}
modified_params['height'] = {'var_name': 'traj_height', 'long_name': "Hysplit height of air parcel"}
if 'land_sfc_category' in list(modified_params.keys()):
modified_params['land_sfc_category']['long_name'] = "Modis land use category (simplified)"
for k in raw_data_keys:
print(k, self.raw_dict.get(k).shape)
dataset = save_item(dataset, {'var_name': modified_params[k]['var_name'],
'dimension': ('time', 'height', 'time_age'),
'arr': self.raw_dict.get(k).copy().astype(np.float32),
'long_name': modified_params[k]['long_name']})
# save the land use
ls_data_keys = list(self.statls_dict.keys())
print('ls_data_keys ', ls_data_keys)
modified_params = {key: {'var_name': key,
'long_name': "land surface " + key.lower(),
'comment': str(self.ls_categories)} for key in ls_data_keys}
for k in ls_data_keys:
print(k, self.statls_dict.get(k).shape)
dataset = save_item(dataset, {'var_name': modified_params[k]['var_name'],
'dimension': ('time', 'height', 'categories'),
'arr': self.statls_dict.get(k),
'long_name': modified_params[k]['long_name'],
'comment': modified_params[k]['comment']})
for k in [ky for ky in ls_data_keys if 'ens' in ky]:
rel = self.statls_dict.get(k)
no_below = self.stat2d_dict.get(k + "_no_below")
no_below = np.repeat(no_below[:,:,np.newaxis], rel.shape[-1], axis=2)
no_below[no_below < 0] = np.nan
norm = np.array(self.no_part)*10*(24./np.array(self.time_res))
norm = np.repeat(norm[:,np.newaxis], rel.shape[1], axis=1)
norm = np.repeat(norm[:,:,np.newaxis], rel.shape[2], axis=2)
normed_time = rel*no_below/norm
normed_time[~np.isfinite(normed_time)] = -1
str_below = modified_params[k]['var_name'].replace("occ_ens_", "")
var_name = "rt_normed_landsfc_" + str_below
long_name = "normed residence time land surface " + str_below
dataset = save_item(dataset, {'var_name': var_name,
'dimension': ('time', 'height', 'categories'),
'arr': normed_time,
'long_name': long_name,
'comment': modified_params[k]['comment']})
# and the geo names
gn_data_keys = list(self.statgn_dict.keys())
print('gn_data_keys ', gn_data_keys)
modified_params = {key: {'var_name': key,
'long_name': "geography names " + key.lower(),
'comment': str(self.geo_names)} for key in gn_data_keys}
for k in gn_data_keys:
print(k, self.statgn_dict.get(k).shape)
dataset = save_item(dataset, {'var_name': modified_params[k]['var_name'],
'dimension': ('time', 'height', 'regions'),
'arr': self.statgn_dict.get(k),
'long_name': modified_params[k]['long_name'],
'comment': modified_params[k]['comment']})
for k in gn_data_keys:
rel = self.statgn_dict.get(k)
no_below = self.stat2d_dict.get(k + "_no_below")
no_below = np.repeat(no_below[:,:,np.newaxis], rel.shape[-1], axis=2)
no_below[no_below < 0] = np.nan
norm = np.array(self.no_part)*10*(24./np.array(self.time_res))
norm = np.repeat(norm[:,np.newaxis], rel.shape[1], axis=1)
norm = np.repeat(norm[:,:,np.newaxis], rel.shape[2], axis=2)
normed_time = rel*no_below/norm
normed_time[~np.isfinite(normed_time)] = -1
str_below = modified_params[k]['var_name'].replace("region_ens_", "")
var_name = "rt_normed_region_" + str_below
long_name = "normed residence time named region " + str_below
dataset = save_item(dataset, {'var_name': var_name,
'dimension': ('time', 'height', 'regions'),
'arr': normed_time,
'long_name': long_name,
'comment': modified_params[k]['comment']})
# TODO make statlat optional statlat_dict
lat_data_keys = list(self.statlat_dict.keys())
print('lat_data_keys ', lat_data_keys)
modified_params = {key: {'var_name': key,
'long_name': "lat_thres " + key.lower(),
'comment': str(self.lat_names)} for key in lat_data_keys}
for k in lat_data_keys:
print("self.statlat_dict.keys()", self.statlat_dict.keys())
print(k, self.statlat_dict.get(k).shape)
dataset = save_item(dataset, {'var_name': modified_params[k]['var_name'],
'dimension': ('time', 'height', 'lat_thres'),
'arr': self.statlat_dict.get(k),
'long_name': modified_params[k]['long_name'],
'comment': modified_params[k]['comment']})
for k in lat_data_keys:
rel = self.statlat_dict.get(k)
no_below = self.stat2d_dict.get(k + "_no_below")
no_below = np.repeat(no_below[:,:,np.newaxis], rel.shape[-1], axis=2)
no_below[no_below < 0] = np.nan
norm = np.array(self.no_part)*10*(24./np.array(self.time_res))
norm = np.repeat(norm[:,np.newaxis], rel.shape[1], axis=1)
norm = np.repeat(norm[:,:,np.newaxis], rel.shape[2], axis=2)
normed_time = rel*no_below/norm
normed_time[~np.isfinite(normed_time)] = -1
str_below = modified_params[k]['var_name'].replace("lat_ens_", "")
var_name = "rt_normed_lat_" + str_below
long_name = "normed residence time latitude " + str_below
dataset = save_item(dataset, {'var_name': var_name,
'dimension': ('time', 'height', 'lat_thres'),
'arr': normed_time,
'long_name': long_name,
'comment': modified_params[k]['comment']})
# save_item(dataset, {'var_name': 'width', 'dimension': ('time', 'height'),
# 'arr': .corr_width_reg[:].filled(), 'long_name': "Spectral width",
# 'comment': "Wind profiler spectral width (standard deviation) corrected by cloud radar (only Bragg contribution)",
# 'units': "m s-1", 'units_html': "m s<sup>-1</sup>",
# 'missing_value': -99., 'plot_range': (0.01, 4.),
# 'plot_scale': "logarithmic"})
with open('output_meta.toml') as output_meta:
meta_info = toml.loads(output_meta.read())
dataset.description = meta_info['description'][model_str]
dataset.location = self.config['station']['name']
if "moving" in self.config['station'].keys() and self.config['station']['moving'] == True:
dataset.coordinates = "Moving Platform!"
else:
dataset.coordinates = (self.config['station']['lat'], self.config['station']['lon'])
dataset.institution = meta_info["institution"]
dataset.authors = meta_info["authors"]
dataset.contact = meta_info["contact"]
dataset.creation_time = datetime.datetime.utcnow().strftime("%Y-%m-%d %H:%M UTC")
dataset.day = self.dt_list[0].day
dataset.month = self.dt_list[0].month
dataset.year = self.dt_list[0].year
dataset.git_commit, dataset.git_branch = get_git_hash()
dataset.close()
gc.collect()
#
# for some wired reson the submodule imports have to be done afterwards
#
import trace_source.hysplit
import trace_source.flexpart
import trace_source.land_sfc
import trace_source.helpers
import trace_source.simulations