4 éve · 046b1833cf
--- a/code/.ipynb_checkpoints/read_2017v1-checkpoint.ipynb
+++ b/code/.ipynb_checkpoints/read_2017v1-checkpoint.ipynb
@@ -0,0 +1,297 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 12,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:12.688993Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:12.688809Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:12.691487Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:12.691055Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:12.688977Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import pandas as pd\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import xarray as xr\n",
			
 
				+    "import pint_xarray\n",
			
 
				+    "import pathlib\n",
			
 
				+    "from openscm_units import unit_registry as ureg\n",
			
 
				+    "import os\n",
			
 
				+    "import tqdm\n",
			
 
				+    "import zipfile"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 13,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.035946Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.035581Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:13.040967Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:13.040022Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.035910Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "inpath = pathlib.Path('../inputs/')\n",
			
 
				+    "outpath = pathlib.Path('../outputs/')\n",
			
 
				+    "\n",
			
 
				+    "# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file\n",
			
 
				+    "compress_options = dict(zlib=True, complevel=1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 14,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.325976Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.325225Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:13.334801Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:13.333160Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.325906Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 15,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.815747Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.814988Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:14.120500Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:14.119710Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.815673Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# read via pandas\n",
			
 
				+    "csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')\n",
			
 
				+    "ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 17,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:26.982561Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:26.981950Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:26.995034Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:26.994022Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:26.982505Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later\n",
			
 
				+    "ds = ds.set_index({'index': ['country', 'category', 'entity']})"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 25,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:07.159983Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:07.159792Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:07.165712Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:07.165271Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:07.159967Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# always the same\n",
			
 
				+    "del ds['version']\n",
			
 
				+    "\n",
			
 
				+    "# split unit information into own array, stack along date axis\n",
			
 
				+    "da_units = ds['unit']\n",
			
 
				+    "del ds['unit']\n",
			
 
				+    "da = ds.to_array('date')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 26,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:07.306345Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:07.306027Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:08.843706Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:08.843316Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:07.306316Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# normalize units\n",
			
 
				+    "\n",
			
 
				+    "entity_metadata_map = {}\n",
			
 
				+    "\n",
			
 
				+    "for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):\n",
			
 
				+    "    entity_metadata_map[entity] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'SARGWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR4'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR4GWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR5'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR5GWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR5CCF'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR5CCFGWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "\n",
			
 
				+    "unit_pretranslation = {\n",
			
 
				+    "    'GgCO2eq': 'Gg',\n",
			
 
				+    "    'MtCO2eq': 'Mt',\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "preferred_units = {\n",
			
 
				+    "    'CO2': 'Gg CO2 / year'\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "# will be used later to set the metadata\n",
			
 
				+    "entity_metadata = {}\n",
			
 
				+    "\n",
			
 
				+    "# now convert each entity to a single unit, normalizing the units to scmdata names\n",
			
 
				+    "for entity in np.unique(da['entity']):\n",
			
 
				+    "    \n",
			
 
				+    "    metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})\n",
			
 
				+    "    \n",
			
 
				+    "    # normalize all units to scmdata\n",
			
 
				+    "    # because scmdata contains the entity in the unit, this is kind of complicated\n",
			
 
				+    "    units = np.unique(da_units.loc[{'entity': entity}])\n",
			
 
				+    "    \n",
			
 
				+    "    # translate units\n",
			
 
				+    "    for unit in units:\n",
			
 
				+    "        tr_unit = unit_pretranslation.get(unit, unit)\n",
			
 
				+    "        \n",
			
 
				+    "        scm_unit = f'{tr_unit} {metadata[\"unit entity\"]} / year'\n",
			
 
				+    "        \n",
			
 
				+    "        da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit\n",
			
 
				+    "    \n",
			
 
				+    "    # convert all units to a single unit\n",
			
 
				+    "    units = sorted(np.unique(da_units.loc[{'entity': entity}]))\n",
			
 
				+    "    \n",
			
 
				+    "    normal_unit = preferred_units.get(metadata['unit entity'], units[0])\n",
			
 
				+    "    \n",
			
 
				+    "    for unit in units:\n",
			
 
				+    "        if unit == normal_unit:\n",
			
 
				+    "            continue\n",
			
 
				+    "        factor = ureg(unit).to(ureg(normal_unit)).magnitude\n",
			
 
				+    "        loc = {'index': (da_units == unit) & (da['entity'] == entity)}\n",
			
 
				+    "        da.loc[loc] *= factor\n",
			
 
				+    "        da_units.loc[loc] = normal_unit\n",
			
 
				+    "    \n",
			
 
				+    "    metadata['units'] = normal_unit\n",
			
 
				+    "    del metadata['unit entity']\n",
			
 
				+    "    entity_metadata[entity] = metadata"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 27,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:08.844610Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:08.844471Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:10.727622Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:10.726972Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:08.844594Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "da = da.unstack().dropna('date', 'all')\n",
			
 
				+    "da['date'] = pd.to_datetime(da['date'].values, format='%Y')\n",
			
 
				+    "ds = da.to_dataset('entity')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 28,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:10.728568Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:10.728384Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:10.733089Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:10.732491Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:10.728544Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for entity in ds.keys():\n",
			
 
				+    "    ds[entity].attrs = entity_metadata[entity]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 29,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:10.734074Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:10.733864Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:12.170363Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:12.169960Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:10.734044Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "encoding = {x: compress_options for x in ds}\n",
			
 
				+    "ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}
			
--- a/code/read_2017v1.ipynb
+++ b/code/read_2017v1.ipynb
@@ -0,0 +1,297 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 12,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:12.688993Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:12.688809Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:12.691487Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:12.691055Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:12.688977Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "import pandas as pd\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "import xarray as xr\n",
			
 
				+    "import pint_xarray\n",
			
 
				+    "import pathlib\n",
			
 
				+    "from openscm_units import unit_registry as ureg\n",
			
 
				+    "import os\n",
			
 
				+    "import tqdm\n",
			
 
				+    "import zipfile"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 13,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.035946Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.035581Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:13.040967Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:13.040022Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.035910Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "inpath = pathlib.Path('../inputs/')\n",
			
 
				+    "outpath = pathlib.Path('../outputs/')\n",
			
 
				+    "\n",
			
 
				+    "# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file\n",
			
 
				+    "compress_options = dict(zlib=True, complevel=1)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 14,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.325976Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.325225Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:13.334801Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:13.333160Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.325906Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 15,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:13.815747Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:13.814988Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:14.120500Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:14.119710Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:13.815673Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# read via pandas\n",
			
 
				+    "csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')\n",
			
 
				+    "ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 17,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:16:26.982561Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:16:26.981950Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:16:26.995034Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:16:26.994022Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:16:26.982505Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later\n",
			
 
				+    "ds = ds.set_index({'index': ['country', 'category', 'entity']})"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 25,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:07.159983Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:07.159792Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:07.165712Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:07.165271Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:07.159967Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# always the same\n",
			
 
				+    "del ds['version']\n",
			
 
				+    "\n",
			
 
				+    "# split unit information into own array, stack along date axis\n",
			
 
				+    "da_units = ds['unit']\n",
			
 
				+    "del ds['unit']\n",
			
 
				+    "da = ds.to_array('date')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 26,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:07.306345Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:07.306027Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:08.843706Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:08.843316Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:07.306316Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "# normalize units\n",
			
 
				+    "\n",
			
 
				+    "entity_metadata_map = {}\n",
			
 
				+    "\n",
			
 
				+    "for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):\n",
			
 
				+    "    entity_metadata_map[entity] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'SARGWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR4'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR4GWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR5'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR5GWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "    entity_metadata_map[f'{entity}AR5CCF'] = {\n",
			
 
				+    "        'entity': entity,\n",
			
 
				+    "        'unit entity': 'CO2',\n",
			
 
				+    "        'gwp conversions': 'AR5CCFGWP100',\n",
			
 
				+    "    }\n",
			
 
				+    "\n",
			
 
				+    "unit_pretranslation = {\n",
			
 
				+    "    'GgCO2eq': 'Gg',\n",
			
 
				+    "    'MtCO2eq': 'Mt',\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "preferred_units = {\n",
			
 
				+    "    'CO2': 'Gg CO2 / year'\n",
			
 
				+    "}\n",
			
 
				+    "\n",
			
 
				+    "# will be used later to set the metadata\n",
			
 
				+    "entity_metadata = {}\n",
			
 
				+    "\n",
			
 
				+    "# now convert each entity to a single unit, normalizing the units to scmdata names\n",
			
 
				+    "for entity in np.unique(da['entity']):\n",
			
 
				+    "    \n",
			
 
				+    "    metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})\n",
			
 
				+    "    \n",
			
 
				+    "    # normalize all units to scmdata\n",
			
 
				+    "    # because scmdata contains the entity in the unit, this is kind of complicated\n",
			
 
				+    "    units = np.unique(da_units.loc[{'entity': entity}])\n",
			
 
				+    "    \n",
			
 
				+    "    # translate units\n",
			
 
				+    "    for unit in units:\n",
			
 
				+    "        tr_unit = unit_pretranslation.get(unit, unit)\n",
			
 
				+    "        \n",
			
 
				+    "        scm_unit = f'{tr_unit} {metadata[\"unit entity\"]} / year'\n",
			
 
				+    "        \n",
			
 
				+    "        da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit\n",
			
 
				+    "    \n",
			
 
				+    "    # convert all units to a single unit\n",
			
 
				+    "    units = sorted(np.unique(da_units.loc[{'entity': entity}]))\n",
			
 
				+    "    \n",
			
 
				+    "    normal_unit = preferred_units.get(metadata['unit entity'], units[0])\n",
			
 
				+    "    \n",
			
 
				+    "    for unit in units:\n",
			
 
				+    "        if unit == normal_unit:\n",
			
 
				+    "            continue\n",
			
 
				+    "        factor = ureg(unit).to(ureg(normal_unit)).magnitude\n",
			
 
				+    "        loc = {'index': (da_units == unit) & (da['entity'] == entity)}\n",
			
 
				+    "        da.loc[loc] *= factor\n",
			
 
				+    "        da_units.loc[loc] = normal_unit\n",
			
 
				+    "    \n",
			
 
				+    "    metadata['units'] = normal_unit\n",
			
 
				+    "    del metadata['unit entity']\n",
			
 
				+    "    entity_metadata[entity] = metadata"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 27,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:08.844610Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:08.844471Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:10.727622Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:10.726972Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:08.844594Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "da = da.unstack().dropna('date', 'all')\n",
			
 
				+    "da['date'] = pd.to_datetime(da['date'].values, format='%Y')\n",
			
 
				+    "ds = da.to_dataset('entity')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 28,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:10.728568Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:10.728384Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:10.733089Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:10.732491Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:10.728544Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "for entity in ds.keys():\n",
			
 
				+    "    ds[entity].attrs = entity_metadata[entity]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 29,
			
 
				+   "metadata": {
			
 
				+    "execution": {
			
 
				+     "iopub.execute_input": "2020-10-15T16:18:10.734074Z",
			
 
				+     "iopub.status.busy": "2020-10-15T16:18:10.733864Z",
			
 
				+     "iopub.status.idle": "2020-10-15T16:18:12.170363Z",
			
 
				+     "shell.execute_reply": "2020-10-15T16:18:12.169960Z",
			
 
				+     "shell.execute_reply.started": "2020-10-15T16:18:10.734044Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "encoding = {x: compress_options for x in ds}\n",
			
 
				+    "ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.8.5"
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 4
			
 
				+}