|
@@ -0,0 +1,297 @@
|
|
|
|
+{
|
|
|
|
+ "cells": [
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 12,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:16:12.688993Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:16:12.688809Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:16:12.691487Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:16:12.691055Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:16:12.688977Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
+ "import numpy as np\n",
|
|
|
|
+ "import xarray as xr\n",
|
|
|
|
+ "import pint_xarray\n",
|
|
|
|
+ "import pathlib\n",
|
|
|
|
+ "from openscm_units import unit_registry as ureg\n",
|
|
|
|
+ "import os\n",
|
|
|
|
+ "import tqdm\n",
|
|
|
|
+ "import zipfile"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 13,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:16:13.035946Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:16:13.035581Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:16:13.040967Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:16:13.040022Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:16:13.035910Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "inpath = pathlib.Path('../inputs/')\n",
|
|
|
|
+ "outpath = pathlib.Path('../outputs/')\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file\n",
|
|
|
|
+ "compress_options = dict(zlib=True, complevel=1)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 14,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:16:13.325976Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:16:13.325225Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:16:13.334801Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:16:13.333160Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:16:13.325906Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 15,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:16:13.815747Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:16:13.814988Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:16:14.120500Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:16:14.119710Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:16:13.815673Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# read via pandas\n",
|
|
|
|
+ "csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')\n",
|
|
|
|
+ "ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 17,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:16:26.982561Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:16:26.981950Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:16:26.995034Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:16:26.994022Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:16:26.982505Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later\n",
|
|
|
|
+ "ds = ds.set_index({'index': ['country', 'category', 'entity']})"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 25,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:18:07.159983Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:18:07.159792Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:18:07.165712Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:18:07.165271Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:18:07.159967Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# always the same\n",
|
|
|
|
+ "del ds['version']\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# split unit information into own array, stack along date axis\n",
|
|
|
|
+ "da_units = ds['unit']\n",
|
|
|
|
+ "del ds['unit']\n",
|
|
|
|
+ "da = ds.to_array('date')"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 26,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:18:07.306345Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:18:07.306027Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:18:08.843706Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:18:08.843316Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:18:07.306316Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "# normalize units\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "entity_metadata_map = {}\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):\n",
|
|
|
|
+ " entity_metadata_map[entity] = {\n",
|
|
|
|
+ " 'entity': entity,\n",
|
|
|
|
+ " 'unit entity': 'CO2',\n",
|
|
|
|
+ " 'gwp conversions': 'SARGWP100',\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " entity_metadata_map[f'{entity}AR4'] = {\n",
|
|
|
|
+ " 'entity': entity,\n",
|
|
|
|
+ " 'unit entity': 'CO2',\n",
|
|
|
|
+ " 'gwp conversions': 'AR4GWP100',\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " entity_metadata_map[f'{entity}AR5'] = {\n",
|
|
|
|
+ " 'entity': entity,\n",
|
|
|
|
+ " 'unit entity': 'CO2',\n",
|
|
|
|
+ " 'gwp conversions': 'AR5GWP100',\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ " entity_metadata_map[f'{entity}AR5CCF'] = {\n",
|
|
|
|
+ " 'entity': entity,\n",
|
|
|
|
+ " 'unit entity': 'CO2',\n",
|
|
|
|
+ " 'gwp conversions': 'AR5CCFGWP100',\n",
|
|
|
|
+ " }\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "unit_pretranslation = {\n",
|
|
|
|
+ " 'GgCO2eq': 'Gg',\n",
|
|
|
|
+ " 'MtCO2eq': 'Mt',\n",
|
|
|
|
+ "}\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "preferred_units = {\n",
|
|
|
|
+ " 'CO2': 'Gg CO2 / year'\n",
|
|
|
|
+ "}\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# will be used later to set the metadata\n",
|
|
|
|
+ "entity_metadata = {}\n",
|
|
|
|
+ "\n",
|
|
|
|
+ "# now convert each entity to a single unit, normalizing the units to scmdata names\n",
|
|
|
|
+ "for entity in np.unique(da['entity']):\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # normalize all units to scmdata\n",
|
|
|
|
+ " # because scmdata contains the entity in the unit, this is kind of complicated\n",
|
|
|
|
+ " units = np.unique(da_units.loc[{'entity': entity}])\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # translate units\n",
|
|
|
|
+ " for unit in units:\n",
|
|
|
|
+ " tr_unit = unit_pretranslation.get(unit, unit)\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " scm_unit = f'{tr_unit} {metadata[\"unit entity\"]} / year'\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " # convert all units to a single unit\n",
|
|
|
|
+ " units = sorted(np.unique(da_units.loc[{'entity': entity}]))\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " normal_unit = preferred_units.get(metadata['unit entity'], units[0])\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " for unit in units:\n",
|
|
|
|
+ " if unit == normal_unit:\n",
|
|
|
|
+ " continue\n",
|
|
|
|
+ " factor = ureg(unit).to(ureg(normal_unit)).magnitude\n",
|
|
|
|
+ " loc = {'index': (da_units == unit) & (da['entity'] == entity)}\n",
|
|
|
|
+ " da.loc[loc] *= factor\n",
|
|
|
|
+ " da_units.loc[loc] = normal_unit\n",
|
|
|
|
+ " \n",
|
|
|
|
+ " metadata['units'] = normal_unit\n",
|
|
|
|
+ " del metadata['unit entity']\n",
|
|
|
|
+ " entity_metadata[entity] = metadata"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 27,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:18:08.844610Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:18:08.844471Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:18:10.727622Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:18:10.726972Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:18:08.844594Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "da = da.unstack().dropna('date', 'all')\n",
|
|
|
|
+ "da['date'] = pd.to_datetime(da['date'].values, format='%Y')\n",
|
|
|
|
+ "ds = da.to_dataset('entity')"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 28,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:18:10.728568Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:18:10.728384Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:18:10.733089Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:18:10.732491Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:18:10.728544Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "for entity in ds.keys():\n",
|
|
|
|
+ " ds[entity].attrs = entity_metadata[entity]"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": 29,
|
|
|
|
+ "metadata": {
|
|
|
|
+ "execution": {
|
|
|
|
+ "iopub.execute_input": "2020-10-15T16:18:10.734074Z",
|
|
|
|
+ "iopub.status.busy": "2020-10-15T16:18:10.733864Z",
|
|
|
|
+ "iopub.status.idle": "2020-10-15T16:18:12.170363Z",
|
|
|
|
+ "shell.execute_reply": "2020-10-15T16:18:12.169960Z",
|
|
|
|
+ "shell.execute_reply.started": "2020-10-15T16:18:10.734044Z"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": [
|
|
|
|
+ "encoding = {x: compress_options for x in ds}\n",
|
|
|
|
+ "ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)"
|
|
|
|
+ ]
|
|
|
|
+ },
|
|
|
|
+ {
|
|
|
|
+ "cell_type": "code",
|
|
|
|
+ "execution_count": null,
|
|
|
|
+ "metadata": {},
|
|
|
|
+ "outputs": [],
|
|
|
|
+ "source": []
|
|
|
|
+ }
|
|
|
|
+ ],
|
|
|
|
+ "metadata": {
|
|
|
|
+ "kernelspec": {
|
|
|
|
+ "display_name": "Python 3",
|
|
|
|
+ "language": "python",
|
|
|
|
+ "name": "python3"
|
|
|
|
+ },
|
|
|
|
+ "language_info": {
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
+ "name": "ipython",
|
|
|
|
+ "version": 3
|
|
|
|
+ },
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
+ "name": "python",
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
+ "version": "3.8.5"
|
|
|
|
+ }
|
|
|
|
+ },
|
|
|
|
+ "nbformat": 4,
|
|
|
|
+ "nbformat_minor": 4
|
|
|
|
+}
|