{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:16:12.688993Z", "iopub.status.busy": "2020-10-15T16:16:12.688809Z", "iopub.status.idle": "2020-10-15T16:16:12.691487Z", "shell.execute_reply": "2020-10-15T16:16:12.691055Z", "shell.execute_reply.started": "2020-10-15T16:16:12.688977Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import xarray as xr\n", "import pint_xarray\n", "import pathlib\n", "from openscm_units import unit_registry as ureg\n", "import os\n", "import tqdm\n", "import zipfile" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:16:13.035946Z", "iopub.status.busy": "2020-10-15T16:16:13.035581Z", "iopub.status.idle": "2020-10-15T16:16:13.040967Z", "shell.execute_reply": "2020-10-15T16:16:13.040022Z", "shell.execute_reply.started": "2020-10-15T16:16:13.035910Z" } }, "outputs": [], "source": [ "inpath = pathlib.Path('../inputs/')\n", "outpath = pathlib.Path('../outputs/')\n", "\n", "# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file\n", "compress_options = dict(zlib=True, complevel=1)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:16:13.325976Z", "iopub.status.busy": "2020-10-15T16:16:13.325225Z", "iopub.status.idle": "2020-10-15T16:16:13.334801Z", "shell.execute_reply": "2020-10-15T16:16:13.333160Z", "shell.execute_reply.started": "2020-10-15T16:16:13.325906Z" } }, "outputs": [], "source": [ "inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:16:13.815747Z", "iopub.status.busy": "2020-10-15T16:16:13.814988Z", "iopub.status.idle": "2020-10-15T16:16:14.120500Z", "shell.execute_reply": "2020-10-15T16:16:14.119710Z", "shell.execute_reply.started": "2020-10-15T16:16:13.815673Z" } }, "outputs": [], "source": [ "# read via pandas\n", "csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')\n", "ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:16:26.982561Z", "iopub.status.busy": "2020-10-15T16:16:26.981950Z", "iopub.status.idle": "2020-10-15T16:16:26.995034Z", "shell.execute_reply": "2020-10-15T16:16:26.994022Z", "shell.execute_reply.started": "2020-10-15T16:16:26.982505Z" } }, "outputs": [], "source": [ "# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later\n", "ds = ds.set_index({'index': ['country', 'category', 'entity']})" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:18:07.159983Z", "iopub.status.busy": "2020-10-15T16:18:07.159792Z", "iopub.status.idle": "2020-10-15T16:18:07.165712Z", "shell.execute_reply": "2020-10-15T16:18:07.165271Z", "shell.execute_reply.started": "2020-10-15T16:18:07.159967Z" } }, "outputs": [], "source": [ "# always the same\n", "del ds['version']\n", "\n", "# split unit information into own array, stack along date axis\n", "da_units = ds['unit']\n", "del ds['unit']\n", "da = ds.to_array('date')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:18:07.306345Z", "iopub.status.busy": "2020-10-15T16:18:07.306027Z", "iopub.status.idle": "2020-10-15T16:18:08.843706Z", "shell.execute_reply": "2020-10-15T16:18:08.843316Z", "shell.execute_reply.started": "2020-10-15T16:18:07.306316Z" } }, "outputs": [], "source": [ "# normalize units\n", "\n", "entity_metadata_map = {}\n", "\n", "for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):\n", " entity_metadata_map[entity] = {\n", " 'entity': entity,\n", " 'unit entity': 'CO2',\n", " 'gwp conversions': 'SARGWP100',\n", " }\n", " entity_metadata_map[f'{entity}AR4'] = {\n", " 'entity': entity,\n", " 'unit entity': 'CO2',\n", " 'gwp conversions': 'AR4GWP100',\n", " }\n", " entity_metadata_map[f'{entity}AR5'] = {\n", " 'entity': entity,\n", " 'unit entity': 'CO2',\n", " 'gwp conversions': 'AR5GWP100',\n", " }\n", " entity_metadata_map[f'{entity}AR5CCF'] = {\n", " 'entity': entity,\n", " 'unit entity': 'CO2',\n", " 'gwp conversions': 'AR5CCFGWP100',\n", " }\n", "\n", "unit_pretranslation = {\n", " 'GgCO2eq': 'Gg',\n", " 'MtCO2eq': 'Mt',\n", "}\n", "\n", "preferred_units = {\n", " 'CO2': 'Gg CO2 / year'\n", "}\n", "\n", "# will be used later to set the metadata\n", "entity_metadata = {}\n", "\n", "# now convert each entity to a single unit, normalizing the units to scmdata names\n", "for entity in np.unique(da['entity']):\n", " \n", " metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})\n", " \n", " # normalize all units to scmdata\n", " # because scmdata contains the entity in the unit, this is kind of complicated\n", " units = np.unique(da_units.loc[{'entity': entity}])\n", " \n", " # translate units\n", " for unit in units:\n", " tr_unit = unit_pretranslation.get(unit, unit)\n", " \n", " scm_unit = f'{tr_unit} {metadata[\"unit entity\"]} / year'\n", " \n", " da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit\n", " \n", " # convert all units to a single unit\n", " units = sorted(np.unique(da_units.loc[{'entity': entity}]))\n", " \n", " normal_unit = preferred_units.get(metadata['unit entity'], units[0])\n", " \n", " for unit in units:\n", " if unit == normal_unit:\n", " continue\n", " factor = ureg(unit).to(ureg(normal_unit)).magnitude\n", " loc = {'index': (da_units == unit) & (da['entity'] == entity)}\n", " da.loc[loc] *= factor\n", " da_units.loc[loc] = normal_unit\n", " \n", " metadata['units'] = normal_unit\n", " del metadata['unit entity']\n", " entity_metadata[entity] = metadata" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:18:08.844610Z", "iopub.status.busy": "2020-10-15T16:18:08.844471Z", "iopub.status.idle": "2020-10-15T16:18:10.727622Z", "shell.execute_reply": "2020-10-15T16:18:10.726972Z", "shell.execute_reply.started": "2020-10-15T16:18:08.844594Z" } }, "outputs": [], "source": [ "da = da.unstack().dropna('date', 'all')\n", "da['date'] = pd.to_datetime(da['date'].values, format='%Y')\n", "ds = da.to_dataset('entity')" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:18:10.728568Z", "iopub.status.busy": "2020-10-15T16:18:10.728384Z", "iopub.status.idle": "2020-10-15T16:18:10.733089Z", "shell.execute_reply": "2020-10-15T16:18:10.732491Z", "shell.execute_reply.started": "2020-10-15T16:18:10.728544Z" } }, "outputs": [], "source": [ "for entity in ds.keys():\n", " ds[entity].attrs = entity_metadata[entity]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "execution": { "iopub.execute_input": "2020-10-15T16:18:10.734074Z", "iopub.status.busy": "2020-10-15T16:18:10.733864Z", "iopub.status.idle": "2020-10-15T16:18:12.170363Z", "shell.execute_reply": "2020-10-15T16:18:12.169960Z", "shell.execute_reply.started": "2020-10-15T16:18:10.734044Z" } }, "outputs": [], "source": [ "encoding = {x: compress_options for x in ds}\n", "ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.5" } }, "nbformat": 4, "nbformat_minor": 4 }