In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import pint_xarray
import pathlib
from openscm_units import unit_registry as ureg
import os
import tqdm
import zipfile

In [2]:
inpath = pathlib.Path('../inputs/')
outpath = pathlib.Path('../outputs/')

# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file
compress_options = dict(zlib=True, complevel=1)

In [3]:
inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')

In [4]:
# read via pandas
csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')
ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()

In [5]:
# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later
ds = ds.set_index({'index': ['country', 'category', 'entity']})

In [6]:
# always the same
del ds['version']

# split unit information into own array, stack along date axis
da_units = ds['unit']
del ds['unit']
da = ds.to_array('date')

In [7]:
# normalize units

entity_metadata_map = {}

for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):
 entity_metadata_map[entity] = {
 'entity': entity,
 'unit entity': 'CO2',
 'gwp conversions': 'SARGWP100',
 }
 entity_metadata_map[f'{entity}AR4'] = {
 'entity': entity,
 'unit entity': 'CO2',
 'gwp conversions': 'AR4GWP100',
 }
 entity_metadata_map[f'{entity}AR5'] = {
 'entity': entity,
 'unit entity': 'CO2',
 'gwp conversions': 'AR5GWP100',
 }
 entity_metadata_map[f'{entity}AR5CCF'] = {
 'entity': entity,
 'unit entity': 'CO2',
 'gwp conversions': 'AR5CCFGWP100',
 }

unit_pretranslation = {
 'GgCO2eq': 'Gg',
 'MtCO2eq': 'Mt',
}

preferred_units = {
 'CO2': 'Gg CO2 / year'
}

# will be used later to set the metadata
entity_metadata = {}

# now convert each entity to a single unit, normalizing the units to scmdata names
for entity in np.unique(da['entity']):
 
 metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})
 
 # normalize all units to scmdata
 # because scmdata contains the entity in the unit, this is kind of complicated
 units = np.unique(da_units.loc[{'entity': entity}])
 
 # translate units
 for unit in units:
 tr_unit = unit_pretranslation.get(unit, unit)
 
 scm_unit = f'{tr_unit} {metadata["unit entity"]} / year'
 
 da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit
 
 # convert all units to a single unit
 units = sorted(np.unique(da_units.loc[{'entity': entity}]))
 
 normal_unit = preferred_units.get(metadata['unit entity'], units[0])
 
 for unit in units:
 if unit == normal_unit:
 continue
 factor = ureg(unit).to(ureg(normal_unit)).magnitude
 loc = {'index': (da_units == unit) & (da['entity'] == entity)}
 da.loc[loc] *= factor
 da_units.loc[loc] = normal_unit
 
 metadata['units'] = normal_unit
 del metadata['unit entity']
 entity_metadata[entity] = metadata

In [8]:
da = da.unstack().dropna('date', 'all')
da['date'] = pd.to_datetime(da['date'].values, format='%Y')
ds = da.to_dataset('entity')

In [9]:
for entity in ds.keys():
 ds[entity].attrs = entity_metadata[entity]

In [10]:
encoding = {x: compress_options for x in ds}
ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)