Mika Pflüger 4 лет назад
Родитель
Сommit
d96c8b9e06
2 измененных файлов с 1 добавлено и 297 удалено
  1. 1 0
      .gitignore
  2. 0 297
      code/.ipynb_checkpoints/read_2017v1-checkpoint.ipynb

+ 1 - 0
.gitignore

@@ -0,0 +1 @@
+.ipynb_checkpoints/

+ 0 - 297
code/.ipynb_checkpoints/read_2017v1-checkpoint.ipynb

@@ -1,297 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:16:12.688993Z",
-     "iopub.status.busy": "2020-10-15T16:16:12.688809Z",
-     "iopub.status.idle": "2020-10-15T16:16:12.691487Z",
-     "shell.execute_reply": "2020-10-15T16:16:12.691055Z",
-     "shell.execute_reply.started": "2020-10-15T16:16:12.688977Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "import pandas as pd\n",
-    "import numpy as np\n",
-    "import xarray as xr\n",
-    "import pint_xarray\n",
-    "import pathlib\n",
-    "from openscm_units import unit_registry as ureg\n",
-    "import os\n",
-    "import tqdm\n",
-    "import zipfile"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 13,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:16:13.035946Z",
-     "iopub.status.busy": "2020-10-15T16:16:13.035581Z",
-     "iopub.status.idle": "2020-10-15T16:16:13.040967Z",
-     "shell.execute_reply": "2020-10-15T16:16:13.040022Z",
-     "shell.execute_reply.started": "2020-10-15T16:16:13.035910Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "inpath = pathlib.Path('../inputs/')\n",
-    "outpath = pathlib.Path('../outputs/')\n",
-    "\n",
-    "# compressing output files with gzip yields a file size less than 1/3 of an uncompressed file\n",
-    "compress_options = dict(zlib=True, complevel=1)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 14,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:16:13.325976Z",
-     "iopub.status.busy": "2020-10-15T16:16:13.325225Z",
-     "iopub.status.idle": "2020-10-15T16:16:13.334801Z",
-     "shell.execute_reply": "2020-10-15T16:16:13.333160Z",
-     "shell.execute_reply.started": "2020-10-15T16:16:13.325906Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "inzip = zipfile.ZipFile(inpath / 'Jeffery-et-al-2018-PRIMAP-crf-2017-v1.zip')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 15,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:16:13.815747Z",
-     "iopub.status.busy": "2020-10-15T16:16:13.814988Z",
-     "iopub.status.idle": "2020-10-15T16:16:14.120500Z",
-     "shell.execute_reply": "2020-10-15T16:16:14.119710Z",
-     "shell.execute_reply.started": "2020-10-15T16:16:13.815673Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# read via pandas\n",
-    "csv_fd = inzip.open('Jeffery-et-al-2018-PRIMAP-crf-2017-v1/Jeffery-et-al-2018-PRIMAP-crf_2017-v1.csv', 'r')\n",
-    "ds = pd.read_csv(csv_fd, skiprows=2).to_xarray()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 17,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:16:26.982561Z",
-     "iopub.status.busy": "2020-10-15T16:16:26.981950Z",
-     "iopub.status.idle": "2020-10-15T16:16:26.995034Z",
-     "shell.execute_reply": "2020-10-15T16:16:26.994022Z",
-     "shell.execute_reply.started": "2020-10-15T16:16:26.982505Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# set indices, this will lead to a MultiIndex'ed ds; we will convert to normal ds later\n",
-    "ds = ds.set_index({'index': ['country', 'category', 'entity']})"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 25,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:18:07.159983Z",
-     "iopub.status.busy": "2020-10-15T16:18:07.159792Z",
-     "iopub.status.idle": "2020-10-15T16:18:07.165712Z",
-     "shell.execute_reply": "2020-10-15T16:18:07.165271Z",
-     "shell.execute_reply.started": "2020-10-15T16:18:07.159967Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# always the same\n",
-    "del ds['version']\n",
-    "\n",
-    "# split unit information into own array, stack along date axis\n",
-    "da_units = ds['unit']\n",
-    "del ds['unit']\n",
-    "da = ds.to_array('date')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 26,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:18:07.306345Z",
-     "iopub.status.busy": "2020-10-15T16:18:07.306027Z",
-     "iopub.status.idle": "2020-10-15T16:18:08.843706Z",
-     "shell.execute_reply": "2020-10-15T16:18:08.843316Z",
-     "shell.execute_reply.started": "2020-10-15T16:18:07.306316Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "# normalize units\n",
-    "\n",
-    "entity_metadata_map = {}\n",
-    "\n",
-    "for entity in ('FGASES', 'KYOTOGHG', 'HFCS', 'OTHERHFCS', 'OTHERPFCS', 'PFCS'):\n",
-    "    entity_metadata_map[entity] = {\n",
-    "        'entity': entity,\n",
-    "        'unit entity': 'CO2',\n",
-    "        'gwp conversions': 'SARGWP100',\n",
-    "    }\n",
-    "    entity_metadata_map[f'{entity}AR4'] = {\n",
-    "        'entity': entity,\n",
-    "        'unit entity': 'CO2',\n",
-    "        'gwp conversions': 'AR4GWP100',\n",
-    "    }\n",
-    "    entity_metadata_map[f'{entity}AR5'] = {\n",
-    "        'entity': entity,\n",
-    "        'unit entity': 'CO2',\n",
-    "        'gwp conversions': 'AR5GWP100',\n",
-    "    }\n",
-    "    entity_metadata_map[f'{entity}AR5CCF'] = {\n",
-    "        'entity': entity,\n",
-    "        'unit entity': 'CO2',\n",
-    "        'gwp conversions': 'AR5CCFGWP100',\n",
-    "    }\n",
-    "\n",
-    "unit_pretranslation = {\n",
-    "    'GgCO2eq': 'Gg',\n",
-    "    'MtCO2eq': 'Mt',\n",
-    "}\n",
-    "\n",
-    "preferred_units = {\n",
-    "    'CO2': 'Gg CO2 / year'\n",
-    "}\n",
-    "\n",
-    "# will be used later to set the metadata\n",
-    "entity_metadata = {}\n",
-    "\n",
-    "# now convert each entity to a single unit, normalizing the units to scmdata names\n",
-    "for entity in np.unique(da['entity']):\n",
-    "    \n",
-    "    metadata = entity_metadata_map.get(entity, {'entity': entity, 'unit entity': entity})\n",
-    "    \n",
-    "    # normalize all units to scmdata\n",
-    "    # because scmdata contains the entity in the unit, this is kind of complicated\n",
-    "    units = np.unique(da_units.loc[{'entity': entity}])\n",
-    "    \n",
-    "    # translate units\n",
-    "    for unit in units:\n",
-    "        tr_unit = unit_pretranslation.get(unit, unit)\n",
-    "        \n",
-    "        scm_unit = f'{tr_unit} {metadata[\"unit entity\"]} / year'\n",
-    "        \n",
-    "        da_units.loc[{'index': (da_units == unit) & (da['entity'] == entity)}] = scm_unit\n",
-    "    \n",
-    "    # convert all units to a single unit\n",
-    "    units = sorted(np.unique(da_units.loc[{'entity': entity}]))\n",
-    "    \n",
-    "    normal_unit = preferred_units.get(metadata['unit entity'], units[0])\n",
-    "    \n",
-    "    for unit in units:\n",
-    "        if unit == normal_unit:\n",
-    "            continue\n",
-    "        factor = ureg(unit).to(ureg(normal_unit)).magnitude\n",
-    "        loc = {'index': (da_units == unit) & (da['entity'] == entity)}\n",
-    "        da.loc[loc] *= factor\n",
-    "        da_units.loc[loc] = normal_unit\n",
-    "    \n",
-    "    metadata['units'] = normal_unit\n",
-    "    del metadata['unit entity']\n",
-    "    entity_metadata[entity] = metadata"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 27,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:18:08.844610Z",
-     "iopub.status.busy": "2020-10-15T16:18:08.844471Z",
-     "iopub.status.idle": "2020-10-15T16:18:10.727622Z",
-     "shell.execute_reply": "2020-10-15T16:18:10.726972Z",
-     "shell.execute_reply.started": "2020-10-15T16:18:08.844594Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "da = da.unstack().dropna('date', 'all')\n",
-    "da['date'] = pd.to_datetime(da['date'].values, format='%Y')\n",
-    "ds = da.to_dataset('entity')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 28,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:18:10.728568Z",
-     "iopub.status.busy": "2020-10-15T16:18:10.728384Z",
-     "iopub.status.idle": "2020-10-15T16:18:10.733089Z",
-     "shell.execute_reply": "2020-10-15T16:18:10.732491Z",
-     "shell.execute_reply.started": "2020-10-15T16:18:10.728544Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "for entity in ds.keys():\n",
-    "    ds[entity].attrs = entity_metadata[entity]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 29,
-   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2020-10-15T16:18:10.734074Z",
-     "iopub.status.busy": "2020-10-15T16:18:10.733864Z",
-     "iopub.status.idle": "2020-10-15T16:18:12.170363Z",
-     "shell.execute_reply": "2020-10-15T16:18:12.169960Z",
-     "shell.execute_reply.started": "2020-10-15T16:18:10.734044Z"
-    }
-   },
-   "outputs": [],
-   "source": [
-    "encoding = {x: compress_options for x in ds}\n",
-    "ds.to_netcdf(outpath / 'primap-crf-2017v1.nc', encoding=encoding)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.8.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}