|
@@ -0,0 +1,2570 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "a8f3f028-ef62-4014-b911-7a61d24e3dae",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### ToDos\n",
|
|
|
+ "- check if unit row lenght is correct"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 1,
|
|
|
+ "id": "461e34a0-47b1-44a7-ba1a-77db66ea783a",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# set environment variable (only for jupyter notebook)\n",
|
|
|
+ "import os\n",
|
|
|
+ "os.environ[\"UNFCCC_GHG_ROOT_PATH\"] = \"/Users/danielbusch/Documents/UNFCCC_non-AnnexI_data\""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 2,
|
|
|
+ "id": "83dd87db-4956-4bb1-937a-84629bfce95b",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import camelot\n",
|
|
|
+ "import primap2 as pm2\n",
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "import numpy as np\n",
|
|
|
+ "from pathlib import Path\n",
|
|
|
+ "import warnings\n",
|
|
|
+ "warnings.filterwarnings(\"ignore\")\n",
|
|
|
+ "\n",
|
|
|
+ "from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 3,
|
|
|
+ "id": "c37d6d49-076c-4823-a486-83fbda3fa33f",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# ###\n",
|
|
|
+ "# configuration\n",
|
|
|
+ "# ###\n",
|
|
|
+ "\n",
|
|
|
+ "input_folder = downloaded_data_path / 'UNFCCC' / 'Guinea' / 'BUR1'\n",
|
|
|
+ "output_folder = extracted_data_path / 'UNFCCC' / 'Guinea'\n",
|
|
|
+ "if not output_folder.exists():\n",
|
|
|
+ " output_folder.mkdir()\n",
|
|
|
+ "\n",
|
|
|
+ "pdf_file = \"Rapport_IGES-Guinee-BUR1_VF.pdf\"\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 4,
|
|
|
+ "id": "87bf46ce-441e-4247-b62a-ce5ebcf26cb8",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# primap2 format conversion\n",
|
|
|
+ "coords_cols = {\n",
|
|
|
+ " \"category\": \"category\",\n",
|
|
|
+ " \"entity\": \"entity\",\n",
|
|
|
+ " \"unit\": \"unit\",\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "coords_defaults = {\n",
|
|
|
+ " \"source\": \"GIN-GHG-Inventory\",\n",
|
|
|
+ " \"provenance\": \"measured\",\n",
|
|
|
+ " \"area\": \"GIN\",\n",
|
|
|
+ " \"scenario\": \"BUR1\",\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "coords_terminologies = {\n",
|
|
|
+ " \"area\": \"ISO3\",\n",
|
|
|
+ " # TODO check if this is correct\n",
|
|
|
+ " \"category\": \"IPCC1996_2006_GIN_Inv\",\n",
|
|
|
+ " \"scenario\": \"PRIMAP\",\n",
|
|
|
+ "}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "23676d59-d7e9-455c-b713-7ce98b92d5d7",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Q: How to choose gwp_to_use?\n",
|
|
|
+ "### Q: 'unit' and 'category' are 'PRIMAP1'. Are there other options?\n",
|
|
|
+ "### Q: Why are we mapping 'NMVOCs': 'NMVOC', wouldn't it be easier to name it NMVOC in the first place?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 5,
|
|
|
+ "id": "953ddab6-07ee-4b60-82f0-f2e9ca76b1a6",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# Are we choosing this gwp\n",
|
|
|
+ "gwp_to_use = \"AR4GWP100\"\n",
|
|
|
+ "coords_value_mapping = {\n",
|
|
|
+ " 'main' : {\n",
|
|
|
+ " \"unit\": \"PRIMAP1\",\n",
|
|
|
+ " \"category\": \"PRIMAP1\",\n",
|
|
|
+ " \"entity\": {\n",
|
|
|
+ " 'HFCs': f\"HFCS ({gwp_to_use})\",\n",
|
|
|
+ " 'PFCs': f\"PFCS ({gwp_to_use})\",\n",
|
|
|
+ " 'SF6' : f\"SF6 ({gwp_to_use})\",\n",
|
|
|
+ " 'NMVOCs': 'NMVOC',\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ " 'energy' : {\n",
|
|
|
+ " \"unit\": \"PRIMAP1\",\n",
|
|
|
+ " \"category\": \"PRIMAP1\",\n",
|
|
|
+ " \"entity\": {\n",
|
|
|
+ " 'NMVOCs': 'NMVOC',\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ " 'lulucf' : {\n",
|
|
|
+ " \"unit\": \"PRIMAP1\",\n",
|
|
|
+ " \"category\": \"PRIMAP1\",\n",
|
|
|
+ " \"entity\": {\n",
|
|
|
+ " 'NMVOCs': 'NMVOC',\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ " 'waste' : {\n",
|
|
|
+ " \"unit\": \"PRIMAP1\",\n",
|
|
|
+ " \"category\": \"PRIMAP1\",\n",
|
|
|
+ " \"entity\": {\n",
|
|
|
+ " 'NMVOCs': 'NMVOC',\n",
|
|
|
+ " }\n",
|
|
|
+ " },\n",
|
|
|
+ " 'trend' : {\n",
|
|
|
+ " \"unit\": \"PRIMAP1\",\n",
|
|
|
+ " \"category\": \"PRIMAP1\",\n",
|
|
|
+ " },\n",
|
|
|
+ " \n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "filter_remove = {\n",
|
|
|
+ " 'f_memo': {\"category\": \"MEMO\"},\n",
|
|
|
+ "}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "ef888811-5803-4df7-8fd8-06830e6d9bce",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "### Q: What to put under references and rights?"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 9,
|
|
|
+ "id": "23b39c1a-700c-46f9-a3f5-33549658ad69",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "meta_data = {\n",
|
|
|
+ " \"references\": \"placeholder\",\n",
|
|
|
+ " \"rights\": \"\",\n",
|
|
|
+ " \"contact\": \"mail@johannes-guetschow.de\",\n",
|
|
|
+ " \"title\": \"Guinea. Biennial update report (BUR). BUR1\",\n",
|
|
|
+ " \"comment\": \"Read fom pdf by Daniel Busch\",\n",
|
|
|
+ " \"institution\": \"UNFCCC\",\n",
|
|
|
+ "}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "id": "2390fb91-d976-47f9-9236-a6c838e1fd56",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "page_def_templates = {\n",
|
|
|
+ " '110': {\n",
|
|
|
+ " \"area\": ['36,718,589,87'],\n",
|
|
|
+ " \"cols\": ['290,340,368,392,425,445,465,497,535,564'],\n",
|
|
|
+ " },\n",
|
|
|
+ " '111': {\n",
|
|
|
+ " \"area\": ['36,736,587,107'],\n",
|
|
|
+ " \"cols\": ['293,335,369,399,424,445,468,497,535,565'],\n",
|
|
|
+ " },\n",
|
|
|
+ " '112': {\n",
|
|
|
+ " \"area\": ['35,733,588,106'],\n",
|
|
|
+ " \"cols\": ['293,335,369,399,424,445,468,497,535,565'],\n",
|
|
|
+ " },\n",
|
|
|
+ " '113': {\n",
|
|
|
+ " \"area\": ['35,733,588,106'],\n",
|
|
|
+ " \"cols\": ['293,335,365,399,424,445,468,497,535,565'],\n",
|
|
|
+ " },\n",
|
|
|
+ " '131' : {\n",
|
|
|
+ " \"area\": ['36,718,590,83'],\n",
|
|
|
+ " \"cols\": ['293,332,370,406,442,480,516,554'],\n",
|
|
|
+ " },\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "# for main table\n",
|
|
|
+ "header_inventory = ['Greenhouse gas source and sink categories',\n",
|
|
|
+ " 'CO2', 'CH4', \"N2O\", 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'NMVOCs','SO2'\n",
|
|
|
+ " ]\n",
|
|
|
+ "# TODO the extra '-' may be wrong here, check again!\n",
|
|
|
+ "unit_inventory = ['-'] + ['Gg'] * len(header_inventory) # one extra for the category columns\n",
|
|
|
+ "unit_inventory[4] = \"GgCO2eq\"\n",
|
|
|
+ "unit_inventory[5] = \"GgCO2eq\"\n",
|
|
|
+ "unit_inventory[6] = \"GgCO2eq\"\n",
|
|
|
+ "\n",
|
|
|
+ "# for energy tables\n",
|
|
|
+ "header_energy = ['Greenhouse gas source and sink categories',\n",
|
|
|
+ " 'CO2', 'CH4', \"N2O\", 'NOx', 'CO', 'NMVOCs','SO2'\n",
|
|
|
+ " ]\n",
|
|
|
+ "unit_energy = ['-'] + ['Gg'] * len(header_energy) # one extra for the category columns\n",
|
|
|
+ "\n",
|
|
|
+ "# for lulucf tables\n",
|
|
|
+ "header_lulucf = ['Greenhouse gas source and sink categories', 'CO2', 'CH4', \"N2O\", 'NOx', 'CO', 'NMVOCs']\n",
|
|
|
+ "unit_lulucf = ['-'] + ['Gg'] * (len(header_lulucf) - 1)\n",
|
|
|
+ "\n",
|
|
|
+ "# for waste table\n",
|
|
|
+ "header_waste = ['Greenhouse gas source and sink categories', 'CO2', 'CH4', \"N2O\", 'NOx', 'CO', 'NMVOCs', 'SO2']\n",
|
|
|
+ "unit_waste = ['-'] + ['Gg'] * (len(header_waste) - 1)\n",
|
|
|
+ "\n",
|
|
|
+ "# for trend table (unit is always Gg for this table)\n",
|
|
|
+ "header_trend = ['data1990', 'data1995', \"data2000\", 'data2005', 'data2010', 'data2015', 'data2018', 'data2019']\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "# define config dict\n",
|
|
|
+ "inv_conf = {\n",
|
|
|
+ " 'header': header_inventory,\n",
|
|
|
+ " 'unit': unit_inventory,\n",
|
|
|
+ " 'header_energy' : header_energy,\n",
|
|
|
+ " 'unit_energy' : unit_energy,\n",
|
|
|
+ " 'header_lulucf' : header_lulucf,\n",
|
|
|
+ " 'unit_lulucf' : unit_lulucf,\n",
|
|
|
+ " 'header_waste' : header_waste,\n",
|
|
|
+ " 'unit_waste' : unit_waste,\n",
|
|
|
+ " 'header_trend' : header_trend,\n",
|
|
|
+ " 'entity_row': 0,\n",
|
|
|
+ " 'unit_row': 1,\n",
|
|
|
+ " 'index_cols': \"Greenhouse gas source and sink categories\",\n",
|
|
|
+ " 'year': {'110' : 1990,\n",
|
|
|
+ " '111' : 2000,\n",
|
|
|
+ " '112' : 2010,\n",
|
|
|
+ " '113' : 2019,\n",
|
|
|
+ " '116' : 1990,\n",
|
|
|
+ " '117' : 2000,\n",
|
|
|
+ " '118' : 2010,\n",
|
|
|
+ " '119' : 2019,\n",
|
|
|
+ " '124' : 1990,\n",
|
|
|
+ " '125' : 2000,\n",
|
|
|
+ " '126' : 2010,\n",
|
|
|
+ " '127' : 2019,\n",
|
|
|
+ " },\n",
|
|
|
+ " 'header_long': [\"orig_cat_name\", \"entity\", \"unit\", \"time\", \"data\"],\n",
|
|
|
+ " \"cat_code_regexp\" : r'^(?P<code>[a-zA-Z0-9\\.]{1,11})[\\s\\.].*'\n",
|
|
|
+ "}"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "cd0b97f8-acbb-4df1-9764-b2d0f6af39ba",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## 1. Read main tables - pages 110, 111, 112, 113"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 19,
|
|
|
+ "id": "4357ddd0-e9ee-4b2b-a765-c36411df63e0",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 110.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 111.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 112.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 113.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "Converting to interchange format.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "pages = ['110', '111', '112', '113']\n",
|
|
|
+ "df_all_dict = {}\n",
|
|
|
+ "for page in pages:\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"-\"*45)\n",
|
|
|
+ " print(f\"Reading table from page {page}.\")\n",
|
|
|
+ " \n",
|
|
|
+ " tables_inventory_original = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " table_areas=page_def_templates[page][\"area\"],\n",
|
|
|
+ " columns=page_def_templates[page][\"cols\"],\n",
|
|
|
+ " flavor=\"stream\",\n",
|
|
|
+ " split_text=True)\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"Reading complete.\")\n",
|
|
|
+ " \n",
|
|
|
+ " df_inventory = tables_inventory_original[0].df.copy()\n",
|
|
|
+ "\n",
|
|
|
+ " # move broken text in correct row (page 113 is fine)\n",
|
|
|
+ " if page in ['110', '111', '112']:\n",
|
|
|
+ " df_inventory.at[4, 0] = \"1.A.1 - Industries énergétiques\"\n",
|
|
|
+ " df_inventory = df_inventory.drop(index=3)\n",
|
|
|
+ " df_inventory.at[8, 0] = \"1.A.4 - Autres secteurs\"\n",
|
|
|
+ " df_inventory = df_inventory.drop(index=7)\n",
|
|
|
+ "\n",
|
|
|
+ " # add header and unit\n",
|
|
|
+ " df_header = pd.DataFrame([inv_conf[\"header\"], inv_conf[\"unit\"]])\n",
|
|
|
+ " df_inventory = pd.concat([df_header, df_inventory], axis=0, join='outer').reset_index(drop=True)\n",
|
|
|
+ " df_inventory = pm2.pm2io.nir_add_unit_information(df_inventory,\n",
|
|
|
+ " unit_row=inv_conf[\"unit_row\"],\n",
|
|
|
+ " entity_row=inv_conf[\"entity_row\"],\n",
|
|
|
+ " regexp_entity=\".*\",\n",
|
|
|
+ " regexp_unit=\".*\",\n",
|
|
|
+ " default_unit=\"Gg\")\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"Added unit information.\")\n",
|
|
|
+ " \n",
|
|
|
+ " # set index\n",
|
|
|
+ " df_inventory = df_inventory.set_index(inv_conf[\"index_cols\"])\n",
|
|
|
+ "\n",
|
|
|
+ " # convert to long format\n",
|
|
|
+ " df_inventory_long = pm2.pm2io.nir_convert_df_to_long(df_inventory, inv_conf[\"year\"][page],\n",
|
|
|
+ " inv_conf[\"header_long\"])\n",
|
|
|
+ "\n",
|
|
|
+ " # extract category from tuple\n",
|
|
|
+ " df_inventory_long[\"orig_cat_name\"] = df_inventory_long[\"orig_cat_name\"].str[0] \n",
|
|
|
+ "\n",
|
|
|
+ " # prep for conversion to PM2 IF and native format\n",
|
|
|
+ " # make a copy of the categories row\n",
|
|
|
+ " df_inventory_long[\"category\"] = df_inventory_long[\"orig_cat_name\"]\n",
|
|
|
+ "\n",
|
|
|
+ " # replace cat names by codes in col \"category\"\n",
|
|
|
+ " # first the manual replacements\n",
|
|
|
+ " # TODO: move this to config section\n",
|
|
|
+ " inv_conf[\"cat_codes_manual\"]['main'] = {\n",
|
|
|
+ " 'Éléments pour mémoire': 'MEMO',\n",
|
|
|
+ " 'Soutes internationales': 'M.BK',\n",
|
|
|
+ " '1.A.3.a.i - Aviation internationale (soutes internationales)': 'M.BK.A',\n",
|
|
|
+ " '1.A.3.d.i - Navigation internationale (soutes internationales)' : 'M.BK.M',\n",
|
|
|
+ " '1.A.5.c - Opérations multilatérales' : 'M.MULTIOP',\n",
|
|
|
+ " 'Total des émissions et absorptions nationales': \"0\",\n",
|
|
|
+ " '2A5: Autre': '2A5', \n",
|
|
|
+ " }\n",
|
|
|
+ " df_inventory_long[\"category\"] = \\\n",
|
|
|
+ " df_inventory_long[\"category\"].replace(inv_conf[\"cat_codes_manual\"]['main']) \n",
|
|
|
+ "\n",
|
|
|
+ " df_inventory_long[\"category\"] = df_inventory_long[\"category\"].str.replace(\".\", \"\")\n",
|
|
|
+ " \n",
|
|
|
+ " # then the regex replacements\n",
|
|
|
+ " repl = lambda m: m.group('code')\n",
|
|
|
+ " df_inventory_long[\"category\"] = \\\n",
|
|
|
+ " df_inventory_long[\"category\"].str.replace(inv_conf[\"cat_code_regexp\"], repl,\n",
|
|
|
+ " regex=True)\n",
|
|
|
+ "\n",
|
|
|
+ " df_inventory_long = df_inventory_long.reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ " \n",
|
|
|
+ " df_inventory_long[\"data\"] = df_inventory_long[\"data\"].str.replace(\",\", \".\")\n",
|
|
|
+ " df_inventory_long[\"data\"] = df_inventory_long[\"data\"].str.replace(\"NE1\", \"NE\")\n",
|
|
|
+ "\n",
|
|
|
+ " # make sure all col headers are str\n",
|
|
|
+ " df_inventory_long.columns = df_inventory_long.columns.map(str)\n",
|
|
|
+ " df_inventory_long = df_inventory_long.drop(columns=[\"orig_cat_name\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_all_dict[page] = df_inventory_long\n",
|
|
|
+ "\n",
|
|
|
+ "df_all = pd.concat([df_all_dict['110'], df_all_dict['111'], df_all_dict['112'], df_all_dict['113']],\n",
|
|
|
+ " axis=0,\n",
|
|
|
+ " join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"Converting to interchange format.\")\n",
|
|
|
+ "df_all_IF = pm2.pm2io.convert_long_dataframe_if(\n",
|
|
|
+ " df_all,\n",
|
|
|
+ " coords_cols=coords_cols,\n",
|
|
|
+ " #add_coords_cols=add_coords_cols,\n",
|
|
|
+ " coords_defaults=coords_defaults,\n",
|
|
|
+ " coords_terminologies=coords_terminologies,\n",
|
|
|
+ " coords_value_mapping=coords_value_mapping['main'],\n",
|
|
|
+ " #coords_value_filling=coords_value_filling,\n",
|
|
|
+ " filter_remove=filter_remove,\n",
|
|
|
+ " #filter_keep=filter_keep,\n",
|
|
|
+ " meta_data=meta_data,\n",
|
|
|
+ " convert_str=True,\n",
|
|
|
+ " time_format=\"%Y\",\n",
|
|
|
+ " )"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "id": "f1a4535e-3abc-45d0-9309-fd7991b1cb95",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.1, CO2, 2010.\n",
|
|
|
+ "[422.474]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 2, SO2, 1990.\n",
|
|
|
+ "[0.097]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.3.a.i, N2O, 2000.\n",
|
|
|
+ "[6.e-05]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 2.H.2, NMVOC, 2019.\n",
|
|
|
+ "[2.506]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.1, CH4, 2019.\n",
|
|
|
+ "[0.0011]\n",
|
|
|
+ "Value matches expected value.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### Test individual values from the tables ###\n",
|
|
|
+ "# TODO and note: this function is work in progress\n",
|
|
|
+ "# Use assert statements and print error message\n",
|
|
|
+ "# with category, entity, year, expected value and actual value\n",
|
|
|
+ "\n",
|
|
|
+ "### Test individual values from the tables ###\n",
|
|
|
+ "def assert_individual_value(\n",
|
|
|
+ " df,\n",
|
|
|
+ " category_column,\n",
|
|
|
+ " entity_column,\n",
|
|
|
+ " category,\n",
|
|
|
+ " entity,\n",
|
|
|
+ " year,\n",
|
|
|
+ " expected_value\n",
|
|
|
+ "):\n",
|
|
|
+ " arr = df.loc[(df[category_column] == category) & (df[entity_column] == entity), year].values\n",
|
|
|
+ " print(arr)\n",
|
|
|
+ " if len(arr) > 1:\n",
|
|
|
+ " print(f\"More than one value found for {category}, {entity}, {year}!\")\n",
|
|
|
+ "\n",
|
|
|
+ " # TODO: It looks like this will be true when the value equals 0\n",
|
|
|
+ " if not arr:\n",
|
|
|
+ " print((f\"No value found for {category}, {entity}, {year}!\"))\n",
|
|
|
+ " \n",
|
|
|
+ " if not arr[0] == expected_value:\n",
|
|
|
+ " print(f\"Expected value {expected_value}, actual value is {arr[0]}\")\n",
|
|
|
+ "\n",
|
|
|
+ " if arr[0] == expected_value:\n",
|
|
|
+ " print(\"Value matches expected value.\")\n",
|
|
|
+ "\n",
|
|
|
+ " return\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "test_cases = {\n",
|
|
|
+ " \"1\" : {\n",
|
|
|
+ " \"category\" : \"1.A.1\",\n",
|
|
|
+ " 'entity' : \"CO2\",\n",
|
|
|
+ " \"year\" : \"2010\",\n",
|
|
|
+ " \"expected_value\" : 422.474,\n",
|
|
|
+ " },\n",
|
|
|
+ " \"2\" : {\n",
|
|
|
+ " \"category\" : \"2\",\n",
|
|
|
+ " 'entity' : \"SO2\",\n",
|
|
|
+ " \"year\" : \"1990\",\n",
|
|
|
+ " \"expected_value\" : 0.097,\n",
|
|
|
+ " },\n",
|
|
|
+ " \"3\" : {\n",
|
|
|
+ " \"category\" : \"1.A.3.a.i\",\n",
|
|
|
+ " 'entity' : \"N2O\",\n",
|
|
|
+ " \"year\" : \"2000\",\n",
|
|
|
+ " \"expected_value\" : 6e-5,\n",
|
|
|
+ " },\n",
|
|
|
+ " '4' : {\n",
|
|
|
+ " \"category\" : \"2.H.2\",\n",
|
|
|
+ " 'entity' : \"NMVOC\",\n",
|
|
|
+ " \"year\" : \"2019\",\n",
|
|
|
+ " \"expected_value\" : 2.506,\n",
|
|
|
+ " },\n",
|
|
|
+ " '5' : {\n",
|
|
|
+ " \"category\" : \"1.A.1\",\n",
|
|
|
+ " 'entity' : \"CH4\",\n",
|
|
|
+ " \"year\" : \"2019\",\n",
|
|
|
+ " \"expected_value\" : 0.0011,\n",
|
|
|
+ " }\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "for key in test_cases.keys():\n",
|
|
|
+ " print(\"-\"*50)\n",
|
|
|
+ " print(f\"Testing combination {test_cases[key][\"category\"]}, {test_cases[key][\"entity\"]}, {test_cases[key][\"year\"]}.\")\n",
|
|
|
+ " assert_individual_value(\n",
|
|
|
+ " df = df_all_IF,\n",
|
|
|
+ " category_column = \"category (IPCC1996_2006_GIN_Inv)\",\n",
|
|
|
+ " entity_column = \"entity\",\n",
|
|
|
+ " category = test_cases[key][\"category\"],\n",
|
|
|
+ " entity = test_cases[key][\"entity\"],\n",
|
|
|
+ " year = test_cases[key][\"year\"],\n",
|
|
|
+ " expected_value = test_cases[key][\"expected_value\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 24,
|
|
|
+ "id": "23258414-84b2-4a99-8f48-f471f5ebf75a",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column source\n",
|
|
|
+ "['GIN-GHG-Inventory']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column scenario (PRIMAP)\n",
|
|
|
+ "['BUR1']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column provenance\n",
|
|
|
+ "['measured']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column area (ISO3)\n",
|
|
|
+ "['GIN']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column entity\n",
|
|
|
+ "['CH4' 'CO' 'CO2' 'HFCS (AR4GWP100)' 'N2O' 'NMVOC' 'NOx'\n",
|
|
|
+ " 'PFCS (AR4GWP100)' 'SF6' 'SO2']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column unit\n",
|
|
|
+ "['Gg CH4 / yr' 'Gg CO / yr' 'Gg CO2 / yr' 'Gg N2O / yr' 'Gg NMVOC / yr'\n",
|
|
|
+ " 'Gg NOx / yr' 'Gg SF6 / yr' 'Gg SO2 / yr']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column category (IPCC1996_2006_GIN_Inv)\n",
|
|
|
+ "['0' '1' '1.A' '1.A.1' '1.A.2' '1.A.3' '1.A.4' '1.A.5' '1.B' '1.C' '2'\n",
|
|
|
+ " '2.A' '2.A.1' '2.A.2' '2.A.3' '2.A.4' '2.A.5' '2.B' '2.C' '2.C.1' '2.C.2'\n",
|
|
|
+ " '2.C.3' '2.C.4' '2.C.5' '2.C.6' '2.C.7' '2.D' '2.D.1' '2.D.2' '2.D.3'\n",
|
|
|
+ " '2.D.4' '2.E' '2.F' '2.F.1' '2.F.2' '2.F.3' '2.F.4' '2.F.5' '2.F.6' '2.G'\n",
|
|
|
+ " '2.H' '2.H.1' '2.H.2' '2.H.3' '3' '3.A' '3.A.1' '3.A.2' '3.B' '3.B.1'\n",
|
|
|
+ " '3.B.2' '3.B.3' '3.B.4' '3.B.5' '3.B.6' '3.C' '3.C.1' '3.C.2' '3.C.3'\n",
|
|
|
+ " '3.C.4' '3.C.5' '3.C.6' '3.C.7' '3.C.8' '3.D' '3.D.1' '3.D.2' '4' '4.A'\n",
|
|
|
+ " '4.B' '4.C' '4.D' '4.E' '5' 'M.BK' 'M.BK.A' 'M.BK.M' 'M.MULTIOP']\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column 1990\n",
|
|
|
+ "[ 6.5202000e+01 6.4650000e+00 3.2000000e-02 6.0000000e-03\n",
|
|
|
+ " 2.5000000e-02 6.4020000e+00 0.0000000e+00 nan\n",
|
|
|
+ " 5.6987000e+01 5.5634000e+01 5.3796000e+01 1.8381000e+00\n",
|
|
|
+ " 1.3530000e+00 1.7500000e+00 1.0290000e+00 2.1700000e-01\n",
|
|
|
+ " 5.0400000e-01 5.0000000e-06 1.6241700e+02 1.2418200e+02\n",
|
|
|
+ " 1.5800000e-01 2.8000000e-02 6.2990000e+00 1.1769600e+02\n",
|
|
|
+ " 3.8236000e+01 -1.6177575e+04 1.3104990e+03 8.1425700e+02\n",
|
|
|
+ " 1.5300000e+02 3.2603400e+02 1.7208000e+01 1.2779000e+01\n",
|
|
|
+ " -1.7502977e+04 -1.7499771e+04 -1.7508456e+04 6.9024000e+01\n",
|
|
|
+ " -6.0339000e+01 2.2800000e-01 -3.4340000e+00 2.1240000e+00\n",
|
|
|
+ " 7.1900000e-01 2.5770000e+00 1.1800000e-01 1.0000000e-03\n",
|
|
|
+ " 3.0000000e-02 8.0000000e-02 1.1344000e+01 2.1900000e+00\n",
|
|
|
+ " 1.2400000e-01 2.0660000e+00 2.6800000e-01 3.4000000e-03\n",
|
|
|
+ " 2.6500000e-01 2.0000000e-05 1.4312200e+01 1.3193000e+01\n",
|
|
|
+ " 5.2800000e-02 1.0000000e-02 1.2329000e+00 1.1897200e+01\n",
|
|
|
+ " 1.1192000e+00 1.0920000e+01 8.6260000e+00 2.1130000e+00\n",
|
|
|
+ " 3.9800000e-01 3.8580000e+00 2.2570000e+00 2.2940000e+00\n",
|
|
|
+ " 9.7000000e-02]\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column 2000\n",
|
|
|
+ "[ 1.1998100e+02 6.4890000e+00 2.4000000e-02 1.8000000e-02\n",
|
|
|
+ " 1.5000000e-01 6.2970000e+00 0.0000000e+00 nan\n",
|
|
|
+ " 1.1056800e+02 1.0791100e+02 1.0429800e+02 3.6134500e+00\n",
|
|
|
+ " 2.6570000e+00 2.9250000e+00 2.0540000e+00 2.0900000e-01\n",
|
|
|
+ " 6.6200000e-01 1.5000000e-05 2.5243200e+02 1.7734200e+02\n",
|
|
|
+ " 1.1800000e-01 8.4000000e-02 3.6605000e+01 1.4053500e+02\n",
|
|
|
+ " 7.5090000e+01 -1.3893667e+04 1.8410300e+03 6.0736800e+02\n",
|
|
|
+ " 4.6044700e+02 7.6103000e+02 1.2185000e+01 1.5640000e+01\n",
|
|
|
+ " -1.5752375e+04 -1.5749970e+04 -1.5766453e+04 1.6484000e+01\n",
|
|
|
+ " 2.6800000e-01 -2.6730000e+00 2.0380000e+00 2.1580000e+00\n",
|
|
|
+ " 2.1500000e-01 4.7480000e+00 1.2500000e-01 5.0000000e-03\n",
|
|
|
+ " 4.0000000e-03 4.9000000e-02 6.8000000e-02 4.2690000e+00\n",
|
|
|
+ " 2.4300000e-01 4.0260000e+00 3.5400000e-01 3.0000000e-03\n",
|
|
|
+ " 3.5100000e-01 6.0000000e-05 1.8179000e+01 1.6697000e+01\n",
|
|
|
+ " 3.9000000e-02 3.0000000e-02 6.9480000e+00 9.6800000e+00\n",
|
|
|
+ " 1.4820000e+00 2.5060000e+00 1.7676000e+01 1.3170000e+01\n",
|
|
|
+ " 1.5740000e+00 1.1960000e+00 7.9620000e+00 2.4380000e+00\n",
|
|
|
+ " 4.5050000e+00 1.5600000e-01]\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column 2010\n",
|
|
|
+ "[ 1.9700000e+02 4.8490000e+00 1.6000000e-02 2.8000000e-02\n",
|
|
|
+ " 3.1600000e-01 4.4890000e+00 0.0000000e+00 nan\n",
|
|
|
+ " 1.8761700e+02 1.8676900e+02 1.8045400e+02 6.3150000e+00\n",
|
|
|
+ " 8.4800000e-01 4.5340000e+00 3.3230000e+00 3.2000000e-01\n",
|
|
|
+ " 8.9200000e-01 3.0000000e-04 1.9571300e+02 1.7174700e+02\n",
|
|
|
+ " 8.2000000e-02 1.3300000e-01 7.7000000e+01 9.4532000e+01\n",
|
|
|
+ " 2.3966000e+01 -1.0691033e+04 2.3437780e+03 4.2247400e+02\n",
|
|
|
+ " 7.0899600e+02 1.2051170e+03 7.1920000e+00 1.9142000e+01\n",
|
|
|
+ " -1.3057077e+04 -1.3052876e+04 -1.3040518e+04 8.9270000e+00\n",
|
|
|
+ " -2.1284000e+01 -4.5210000e+00 3.1240000e+00 3.6900000e+01\n",
|
|
|
+ " 3.0400000e+00 7.5620000e+00 1.1900000e-01 3.0000000e-03\n",
|
|
|
+ " 6.0000000e-03 6.0000000e-02 5.1000000e-02 6.9670000e+00\n",
|
|
|
+ " 7.7000000e-02 6.8900000e+00 4.7500000e-01 5.0000000e-03\n",
|
|
|
+ " 4.7000000e-01 1.0000000e-03 2.7269600e+01 2.1977000e+01\n",
|
|
|
+ " 2.7300000e-02 4.6100000e-02 1.4539500e+01 7.3641000e+00\n",
|
|
|
+ " 5.2926000e+00 3.3038000e+00 1.9888000e+00 1.7748000e+01\n",
|
|
|
+ " 1.6310000e+01 1.0920000e+00 1.8330000e+00 1.1701000e+01\n",
|
|
|
+ " 1.6840000e+00 1.4380000e+00 1.0400000e-01]\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Unique values in column 2019\n",
|
|
|
+ "[ 3.120340e+02 5.866000e+00 1.100000e-03 3.270000e-02 4.584000e-01\n",
|
|
|
+ " 5.374200e+00 0.000000e+00 nan 2.995030e+02 2.985330e+02\n",
|
|
|
+ " 2.882390e+02 1.029400e+01 9.700000e-01 6.665000e+00 5.170000e+00\n",
|
|
|
+ " 3.570000e-01 1.138000e+00 5.000000e-04 2.626700e+02 2.244300e+02\n",
|
|
|
+ " 5.400000e-03 1.391000e-01 1.121100e+02 1.121800e+02 3.823600e+01\n",
|
|
|
+ " -9.360370e+03 3.037736e+03 2.735500e+01 8.249530e+02 2.168661e+03\n",
|
|
|
+ " 1.676700e+01 1.114700e+02 6.984600e+01 2.097200e+01 2.065200e+01\n",
|
|
|
+ " -1.251307e+04 -1.251512e+04 -1.254581e+04 3.068900e+01 2.053000e+00\n",
|
|
|
+ " 3.491000e+00 6.619700e+01 6.141000e+00 1.211700e+01 1.770000e-01\n",
|
|
|
+ " 2.000000e-04 6.500000e-03 1.090000e-01 6.100000e-02 1.134400e+01\n",
|
|
|
+ " 8.900000e-02 1.125500e+01 5.970000e-01 6.000000e-03 5.910000e-01\n",
|
|
|
+ " 2.000000e-03 3.632300e+01 3.004400e+01 5.400000e-02 2.124900e+01\n",
|
|
|
+ " 8.739000e+00 6.279000e+00 3.773000e+00 2.506000e+00 2.822000e+01\n",
|
|
|
+ " 2.592000e+01 7.200000e-02 2.163000e+00 2.168000e+01 2.009000e+00\n",
|
|
|
+ " 2.294000e+00 3.280000e-01]\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### check data for errors ###\n",
|
|
|
+ "# print a few things to see if it looks \"normal\"\n",
|
|
|
+ "for c in df_all_IF.columns:\n",
|
|
|
+ " print('-'*50)\n",
|
|
|
+ " print(f\"Unique values in column {c}\")\n",
|
|
|
+ " print(df_all_IF[c].unique())"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 25,
|
|
|
+ "id": "07812254-fb73-4cb5-ae45-a96a2f2273d4",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-21 16:58:31.197\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2.pm2io._interchange_format\u001b[0m:\u001b[36mfrom_interchange_format\u001b[0m:\u001b[36m320\u001b[0m - \u001b[34m\u001b[1mExpected array shapes: [[1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78], [1, 1, 1, 1, 10, 78]], resulting in size 7,800.\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-21 16:58:31.323\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprimap2._data_format\u001b[0m:\u001b[36mensure_valid_attributes\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mReference information is not a DOI: 'placeholder'\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### convert to primap2 format ###\n",
|
|
|
+ "data_pm2_main = pm2.pm2io.from_interchange_format(df_all_IF)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "88d4e68e-f1f4-4c7d-b710-c749296a16ca",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "## 2. Read in sector tables for energy - pages 116, 117, 118, 119"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 27,
|
|
|
+ "id": "251c3495-8506-4f43-9a97-094b5fb16947",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 116.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 117.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 118.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 119.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "Converting to interchange format.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " <th>scenario (PRIMAP)</th>\n",
|
|
|
+ " <th>provenance</th>\n",
|
|
|
+ " <th>area (ISO3)</th>\n",
|
|
|
+ " <th>entity</th>\n",
|
|
|
+ " <th>unit</th>\n",
|
|
|
+ " <th>category (IPCC1996_2006_GIN_Inv)</th>\n",
|
|
|
+ " <th>1990</th>\n",
|
|
|
+ " <th>2000</th>\n",
|
|
|
+ " <th>2010</th>\n",
|
|
|
+ " <th>2019</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>6.465</td>\n",
|
|
|
+ " <td>6.489</td>\n",
|
|
|
+ " <td>4.849</td>\n",
|
|
|
+ " <td>5.821</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A</td>\n",
|
|
|
+ " <td>6.465</td>\n",
|
|
|
+ " <td>6.489</td>\n",
|
|
|
+ " <td>4.849</td>\n",
|
|
|
+ " <td>5.821</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A.1</td>\n",
|
|
|
+ " <td>0.032</td>\n",
|
|
|
+ " <td>0.024</td>\n",
|
|
|
+ " <td>0.016</td>\n",
|
|
|
+ " <td>0.001</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A.1.a</td>\n",
|
|
|
+ " <td>0.032</td>\n",
|
|
|
+ " <td>0.024</td>\n",
|
|
|
+ " <td>0.016</td>\n",
|
|
|
+ " <td>0.001</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A.1.a.i</td>\n",
|
|
|
+ " <td>0.032</td>\n",
|
|
|
+ " <td>0.024</td>\n",
|
|
|
+ " <td>0.016</td>\n",
|
|
|
+ " <td>0.001</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>...</th>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>373</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>1.A.5.b.iii</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>374</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>1.A.5.c</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>375</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>1.B</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>376</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>M.BK.M</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>377</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>M.MULTIOP</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "<p>378 rows × 11 columns</p>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " source scenario (PRIMAP) provenance area (ISO3) entity \\\n",
|
|
|
+ "0 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "1 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "2 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "3 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "4 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "373 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "374 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "375 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "376 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "377 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "\n",
|
|
|
+ " unit category (IPCC1996_2006_GIN_Inv) 1990 2000 2010 2019 \n",
|
|
|
+ "0 Gg CH4 / yr 1 6.465 6.489 4.849 5.821 \n",
|
|
|
+ "1 Gg CH4 / yr 1.A 6.465 6.489 4.849 5.821 \n",
|
|
|
+ "2 Gg CH4 / yr 1.A.1 0.032 0.024 0.016 0.001 \n",
|
|
|
+ "3 Gg CH4 / yr 1.A.1.a 0.032 0.024 0.016 0.001 \n",
|
|
|
+ "4 Gg CH4 / yr 1.A.1.a.i 0.032 0.024 0.016 0.001 \n",
|
|
|
+ ".. ... ... ... ... ... ... \n",
|
|
|
+ "373 Gg SO2 / yr 1.A.5.b.iii NaN NaN NaN NaN \n",
|
|
|
+ "374 Gg SO2 / yr 1.A.5.c NaN NaN NaN NaN \n",
|
|
|
+ "375 Gg SO2 / yr 1.B NaN NaN NaN NaN \n",
|
|
|
+ "376 Gg SO2 / yr M.BK.M NaN NaN NaN NaN \n",
|
|
|
+ "377 Gg SO2 / yr M.MULTIOP NaN NaN NaN NaN \n",
|
|
|
+ "\n",
|
|
|
+ "[378 rows x 11 columns]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 27,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "pages = ['116', '117', '118', '119']\n",
|
|
|
+ "df_energy_dict = {}\n",
|
|
|
+ "for page in pages:\n",
|
|
|
+ " print(\"-\"*45)\n",
|
|
|
+ " print(f\"Reading table from page {page}.\")\n",
|
|
|
+ " \n",
|
|
|
+ " tables_inventory_original = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " flavor=\"lattice\",\n",
|
|
|
+ " split_text=True\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"Reading complete.\")\n",
|
|
|
+ "\n",
|
|
|
+ " # cut last two lines of second table to ignore additional information regarding biomass for energy production \n",
|
|
|
+ " df_energy_year = pd.concat([tables_inventory_original[0].df[2:],\n",
|
|
|
+ " tables_inventory_original[1].df[3:-2]],\n",
|
|
|
+ " axis=0,\n",
|
|
|
+ " join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ " \n",
|
|
|
+ " # drop duplicate lines - 1.A.3.d.i / 1.A.3.a.i / 1.A.5.c\n",
|
|
|
+ " # TODO: better to find the index of the line and then drop it by the index\n",
|
|
|
+ " df_energy_year = df_energy_year.drop(index=[27, 32, 50]) \n",
|
|
|
+ " \n",
|
|
|
+ " # add header and unit\n",
|
|
|
+ " df_header = pd.DataFrame([inv_conf[\"header_energy\"], inv_conf[\"unit_energy\"]])\n",
|
|
|
+ "\n",
|
|
|
+ " df_energy_year = pd.concat([df_header, df_energy_year], axis=0, join='outer').reset_index(drop=True)\n",
|
|
|
+ " \n",
|
|
|
+ " df_energy_year = pm2.pm2io.nir_add_unit_information(df_energy_year,\n",
|
|
|
+ " unit_row=inv_conf[\"unit_row\"],\n",
|
|
|
+ " entity_row=inv_conf[\"entity_row\"],\n",
|
|
|
+ " regexp_entity=\".*\",\n",
|
|
|
+ " regexp_unit=\".*\",\n",
|
|
|
+ " default_unit=\"Gg\")\n",
|
|
|
+ " \n",
|
|
|
+ " print(\"Added unit information.\")\n",
|
|
|
+ " # set index\n",
|
|
|
+ " df_energy_year = df_energy_year.set_index(inv_conf[\"index_cols\"])\n",
|
|
|
+ "\n",
|
|
|
+ " # convert to long format\n",
|
|
|
+ " df_energy_year_long = pm2.pm2io.nir_convert_df_to_long(df_energy_year, inv_conf[\"year\"][page],\n",
|
|
|
+ " inv_conf[\"header_long\"])\n",
|
|
|
+ " \n",
|
|
|
+ " # extract from tuple\n",
|
|
|
+ " df_energy_year_long[\"orig_cat_name\"] = df_energy_year_long[\"orig_cat_name\"].str[0] \n",
|
|
|
+ "\n",
|
|
|
+ " # prep for conversion to PM2 IF and native format\n",
|
|
|
+ " # make a copy of the categories row\n",
|
|
|
+ " df_energy_year_long[\"category\"] = df_energy_year_long[\"orig_cat_name\"]\n",
|
|
|
+ "\n",
|
|
|
+ " # replace individual categories\n",
|
|
|
+ " # TODO: move to config section\n",
|
|
|
+ " inv_conf[\"cat_codes_manual\"]['energy'] = {\n",
|
|
|
+ " 'International Bunkers': 'MEMO',\n",
|
|
|
+ " '1.A.3.a.i - Aviation internationale (soutes internationales)': 'M.BK.A',\n",
|
|
|
+ " '1.A.3.d.i - Navigation internationale (soutes internationales)' : 'M.BK.M',\n",
|
|
|
+ " '1.A.5.c - Opérations multilatérales' : 'M.MULTIOP',\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " # replace cat names by codes in col \"category\"\n",
|
|
|
+ " # first the manual replacements\n",
|
|
|
+ " df_energy_year_long[\"category\"] = df_energy_year_long[\"category\"].str.replace('\\n' ,'')\n",
|
|
|
+ " df_energy_year_long[\"category\"] = \\\n",
|
|
|
+ " df_energy_year_long[\"category\"].replace(inv_conf[\"cat_codes_manual\"]['energy'])\n",
|
|
|
+ "\n",
|
|
|
+ " df_energy_year_long[\"category\"] = df_energy_year_long[\"category\"].str.replace(\".\", \"\")\n",
|
|
|
+ " \n",
|
|
|
+ " inv_conf[\"cat_code_regexp\"] = r'^(?P<code>[a-zA-Z0-9\\.]{1,11})[\\s\\.].*'\n",
|
|
|
+ "\n",
|
|
|
+ " # then the regex replacements\n",
|
|
|
+ " repl = lambda m: m.group('code')\n",
|
|
|
+ " df_energy_year_long[\"category\"] = \\\n",
|
|
|
+ " df_energy_year_long[\"category\"].str.replace(inv_conf[\"cat_code_regexp\"], repl,\n",
|
|
|
+ " regex=True)\n",
|
|
|
+ "\n",
|
|
|
+ " df_energy_year_long = df_energy_year_long.reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ " \n",
|
|
|
+ " df_energy_year_long[\"data\"] = df_energy_year_long[\"data\"].str.replace(\",\", \".\")\n",
|
|
|
+ " df_energy_year_long[\"data\"] = df_energy_year_long[\"data\"].str.replace(\"NE1\", \"NE\")\n",
|
|
|
+ "\n",
|
|
|
+ " # make sure all col headers are str\n",
|
|
|
+ " df_energy_year_long.columns = df_energy_year_long.columns.map(str)\n",
|
|
|
+ " df_energy_year_long = df_energy_year_long.drop(columns=[\"orig_cat_name\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_energy_dict[page] = df_energy_year_long\n",
|
|
|
+ "\n",
|
|
|
+ "df_energy = pd.concat([df_energy_dict['116'], df_energy_dict['117'], df_energy_dict['118'], df_energy_dict['119']],\n",
|
|
|
+ " axis=0,\n",
|
|
|
+ " join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"Converting to interchange format.\")\n",
|
|
|
+ "df_energy_IF = pm2.pm2io.convert_long_dataframe_if(\n",
|
|
|
+ " df_energy,\n",
|
|
|
+ " coords_cols=coords_cols,\n",
|
|
|
+ " #add_coords_cols=add_coords_cols,\n",
|
|
|
+ " coords_defaults=coords_defaults,\n",
|
|
|
+ " coords_terminologies=coords_terminologies,\n",
|
|
|
+ " coords_value_mapping=coords_value_mapping['energy'],\n",
|
|
|
+ " #coords_value_filling=coords_value_filling,\n",
|
|
|
+ " filter_remove=filter_remove,\n",
|
|
|
+ " #filter_keep=filter_keep,\n",
|
|
|
+ " meta_data=meta_data,\n",
|
|
|
+ " convert_str=True,\n",
|
|
|
+ " time_format=\"%Y\",\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ "df_energy_IF"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 30,
|
|
|
+ "id": "64fa29dc-f62b-4010-bfed-8cd588675475",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.2.k, CH4, 1990.\n",
|
|
|
+ "[3.e-05]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.4.c.i, CO, 1990.\n",
|
|
|
+ "[0.0016]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.3.a.i, NMVOC, 2000.\n",
|
|
|
+ "[0.0002]\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1, SO2, 2010.\n",
|
|
|
+ "[0.]\n",
|
|
|
+ "No value found for 1, SO2, 2010!\n",
|
|
|
+ "Value matches expected value.\n",
|
|
|
+ "--------------------------------------------------\n",
|
|
|
+ "Testing combination 1.A.2.k, N2O, 2019.\n",
|
|
|
+ "[7.e-06]\n",
|
|
|
+ "Value matches expected value.\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "test_cases = {\n",
|
|
|
+ " \"1\" : {\n",
|
|
|
+ " \"category\" : \"1.A.2.k\",\n",
|
|
|
+ " 'entity' : \"CH4\",\n",
|
|
|
+ " \"year\" : \"1990\",\n",
|
|
|
+ " \"expected_value\" : 3e-05,\n",
|
|
|
+ " },\n",
|
|
|
+ " \"2\" : {\n",
|
|
|
+ " \"category\" : \"1.A.4.c.i\",\n",
|
|
|
+ " 'entity' : \"CO\",\n",
|
|
|
+ " \"year\" : \"1990\",\n",
|
|
|
+ " \"expected_value\" : 0.0016,\n",
|
|
|
+ " },\n",
|
|
|
+ " \"3\" : {\n",
|
|
|
+ " \"category\" : \"1.A.3.a.i\",\n",
|
|
|
+ " 'entity' : \"NMVOC\",\n",
|
|
|
+ " \"year\" : \"2000\",\n",
|
|
|
+ " \"expected_value\" : 0.0002,\n",
|
|
|
+ " },\n",
|
|
|
+ " '4' : {\n",
|
|
|
+ " \"category\" : \"1\",\n",
|
|
|
+ " 'entity' : \"SO2\",\n",
|
|
|
+ " \"year\" : \"2010\",\n",
|
|
|
+ " \"expected_value\" : 0,\n",
|
|
|
+ " },\n",
|
|
|
+ " '5' : {\n",
|
|
|
+ " \"category\" : \"1.A.2.k\",\n",
|
|
|
+ " 'entity' : \"N2O\",\n",
|
|
|
+ " \"year\" : \"2019\",\n",
|
|
|
+ " \"expected_value\" : 7e-06,\n",
|
|
|
+ " }\n",
|
|
|
+ "}\n",
|
|
|
+ "\n",
|
|
|
+ "for key in test_cases.keys():\n",
|
|
|
+ " print(\"-\"*50)\n",
|
|
|
+ " print(f\"Testing combination {test_cases[key][\"category\"]}, {test_cases[key][\"entity\"]}, {test_cases[key][\"year\"]}.\")\n",
|
|
|
+ " assert_individual_value(\n",
|
|
|
+ " df = df_energy_IF,\n",
|
|
|
+ " category_column = \"category (IPCC1996_2006_GIN_Inv)\",\n",
|
|
|
+ " entity_column = \"entity\",\n",
|
|
|
+ " category = test_cases[key][\"category\"],\n",
|
|
|
+ " entity = test_cases[key][\"entity\"],\n",
|
|
|
+ " year = test_cases[key][\"year\"],\n",
|
|
|
+ " expected_value = test_cases[key][\"expected_value\"])"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 31,
|
|
|
+ "id": "bcf727f7-3474-4f2e-9bcb-ebdd140a14c1",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-21 17:25:29.863\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2.pm2io._interchange_format\u001b[0m:\u001b[36mfrom_interchange_format\u001b[0m:\u001b[36m320\u001b[0m - \u001b[34m\u001b[1mExpected array shapes: [[1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54], [1, 1, 1, 1, 7, 54]], resulting in size 2,646.\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-21 17:25:29.940\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprimap2._data_format\u001b[0m:\u001b[36mensure_valid_attributes\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mReference information is not a DOI: 'placeholder'\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### convert to primap2 format ###\n",
|
|
|
+ "data_pm2_energy = pm2.pm2io.from_interchange_format(df_energy_IF)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "d557a318-ea3f-44ec-9187-c05da423fbca",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# 3. Read in LULUCF table - pages 124, 125, 126, 127"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 50,
|
|
|
+ "id": "b4d117f0-6bfc-468f-b9f2-f66d5eaf8f1a",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 124.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 125.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 126.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table from page 127.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "Converting to interchange format.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " <th>scenario (PRIMAP)</th>\n",
|
|
|
+ " <th>provenance</th>\n",
|
|
|
+ " <th>area (ISO3)</th>\n",
|
|
|
+ " <th>entity</th>\n",
|
|
|
+ " <th>unit</th>\n",
|
|
|
+ " <th>category (IPCC1996_2006_GIN_Inv)</th>\n",
|
|
|
+ " <th>1990</th>\n",
|
|
|
+ " <th>2000</th>\n",
|
|
|
+ " <th>2010</th>\n",
|
|
|
+ " <th>2019</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>3</td>\n",
|
|
|
+ " <td>56.987</td>\n",
|
|
|
+ " <td>110.568</td>\n",
|
|
|
+ " <td>187.617</td>\n",
|
|
|
+ " <td>299.503</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>3.A</td>\n",
|
|
|
+ " <td>55.634</td>\n",
|
|
|
+ " <td>107.911</td>\n",
|
|
|
+ " <td>186.769</td>\n",
|
|
|
+ " <td>298.533</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>3.A.1</td>\n",
|
|
|
+ " <td>53.796</td>\n",
|
|
|
+ " <td>104.298</td>\n",
|
|
|
+ " <td>180.454</td>\n",
|
|
|
+ " <td>288.239</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>3.A.1.a</td>\n",
|
|
|
+ " <td>49.050</td>\n",
|
|
|
+ " <td>94.967</td>\n",
|
|
|
+ " <td>161.753</td>\n",
|
|
|
+ " <td>256.319</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>3.A.1.a.i</td>\n",
|
|
|
+ " <td>10.488</td>\n",
|
|
|
+ " <td>17.802</td>\n",
|
|
|
+ " <td>27.091</td>\n",
|
|
|
+ " <td>31.905</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>...</th>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>469</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>NOx</td>\n",
|
|
|
+ " <td>Gg NOx / yr</td>\n",
|
|
|
+ " <td>3.C.7</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>470</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>NOx</td>\n",
|
|
|
+ " <td>Gg NOx / yr</td>\n",
|
|
|
+ " <td>3.C.8</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>471</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>NOx</td>\n",
|
|
|
+ " <td>Gg NOx / yr</td>\n",
|
|
|
+ " <td>3.D</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>472</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>NOx</td>\n",
|
|
|
+ " <td>Gg NOx / yr</td>\n",
|
|
|
+ " <td>3.D.1</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>473</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>NOx</td>\n",
|
|
|
+ " <td>Gg NOx / yr</td>\n",
|
|
|
+ " <td>3.D.2</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "<p>474 rows × 11 columns</p>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " source scenario (PRIMAP) provenance area (ISO3) entity \\\n",
|
|
|
+ "0 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "1 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "2 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "3 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "4 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "469 GIN-GHG-Inventory BUR1 measured GIN NOx \n",
|
|
|
+ "470 GIN-GHG-Inventory BUR1 measured GIN NOx \n",
|
|
|
+ "471 GIN-GHG-Inventory BUR1 measured GIN NOx \n",
|
|
|
+ "472 GIN-GHG-Inventory BUR1 measured GIN NOx \n",
|
|
|
+ "473 GIN-GHG-Inventory BUR1 measured GIN NOx \n",
|
|
|
+ "\n",
|
|
|
+ " unit category (IPCC1996_2006_GIN_Inv) 1990 2000 2010 \\\n",
|
|
|
+ "0 Gg CH4 / yr 3 56.987 110.568 187.617 \n",
|
|
|
+ "1 Gg CH4 / yr 3.A 55.634 107.911 186.769 \n",
|
|
|
+ "2 Gg CH4 / yr 3.A.1 53.796 104.298 180.454 \n",
|
|
|
+ "3 Gg CH4 / yr 3.A.1.a 49.050 94.967 161.753 \n",
|
|
|
+ "4 Gg CH4 / yr 3.A.1.a.i 10.488 17.802 27.091 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "469 Gg NOx / yr 3.C.7 0.000 0.000 0.000 \n",
|
|
|
+ "470 Gg NOx / yr 3.C.8 0.000 0.000 0.000 \n",
|
|
|
+ "471 Gg NOx / yr 3.D 0.000 0.000 0.000 \n",
|
|
|
+ "472 Gg NOx / yr 3.D.1 0.000 0.000 0.000 \n",
|
|
|
+ "473 Gg NOx / yr 3.D.2 0.000 0.000 0.000 \n",
|
|
|
+ "\n",
|
|
|
+ " 2019 \n",
|
|
|
+ "0 299.503 \n",
|
|
|
+ "1 298.533 \n",
|
|
|
+ "2 288.239 \n",
|
|
|
+ "3 256.319 \n",
|
|
|
+ "4 31.905 \n",
|
|
|
+ ".. ... \n",
|
|
|
+ "469 0.000 \n",
|
|
|
+ "470 0.000 \n",
|
|
|
+ "471 0.000 \n",
|
|
|
+ "472 0.000 \n",
|
|
|
+ "473 0.000 \n",
|
|
|
+ "\n",
|
|
|
+ "[474 rows x 11 columns]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 50,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "pages = ['124', '125', '126', '127']\n",
|
|
|
+ "df_lulucf_dict = {}\n",
|
|
|
+ "for page in pages:\n",
|
|
|
+ " print(\"-\"*45)\n",
|
|
|
+ " print(f\"Reading table from page {page}.\")\n",
|
|
|
+ " \n",
|
|
|
+ " tables_inventory_original = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " flavor=\"lattice\",\n",
|
|
|
+ " split_text=True\n",
|
|
|
+ " )\n",
|
|
|
+ " print(\"Reading complete.\")\n",
|
|
|
+ "\n",
|
|
|
+ " if page == '127':\n",
|
|
|
+ " # table on page 127 has one extra row at the top\n",
|
|
|
+ " # and one extra category 3.A.1.j\n",
|
|
|
+ " df_lulucf_year = tables_inventory_original[0].df[3:]\n",
|
|
|
+ " # rename duplicate categories in tables\n",
|
|
|
+ " # TODO move to config section\n",
|
|
|
+ " replace_categories = [(19, \"3.A.2.a.i - Vaches laitières\"),\n",
|
|
|
+ " (20, \"3.A.2.a.ii - Autres bovins\"),\n",
|
|
|
+ " (21, \"3.A.2.b - Buffle\"),\n",
|
|
|
+ " (22, \"3.A.2.c - Ovins\"),\n",
|
|
|
+ " (23, \"3.A.2.d - Caprins\"),\n",
|
|
|
+ " (24, \"3.A.2.e - Chameaux\"),\n",
|
|
|
+ " (25, \"3.A.2.f - Chevaux\"),\n",
|
|
|
+ " (26, \"3.A.2.g - Mules et ânes\"),\n",
|
|
|
+ " (27, \"3.A.2.h - Porcins\"),\n",
|
|
|
+ " (28, \"3.A.2.i - Volailles\"),\n",
|
|
|
+ " (29, \"3.A.2.j - Autres (préciser)\"),]\n",
|
|
|
+ " for index, category_name in replace_categories:\n",
|
|
|
+ " df_lulucf_year.at[index, 0] = category_name\n",
|
|
|
+ " else:\n",
|
|
|
+ " # cut first two lines\n",
|
|
|
+ " df_lulucf_year = tables_inventory_original[0].df[2:] \n",
|
|
|
+ "\n",
|
|
|
+ " # TODO move to config section\n",
|
|
|
+ " replace_categories = [(17, \"3.A.2.a.i - Vaches laitières\"),\n",
|
|
|
+ " (18, \"3.A.2.a.ii - Autres bovins\"),\n",
|
|
|
+ " (19, \"3.A.2.b - Buffle\"),\n",
|
|
|
+ " (20, \"3.A.2.c - Ovins\"),\n",
|
|
|
+ " (21, \"3.A.2.d - Caprins\"),\n",
|
|
|
+ " (22, \"3.A.2.e - Chameaux\"),\n",
|
|
|
+ " (23, \"3.A.2.f - Chevaux\"),\n",
|
|
|
+ " (24, \"3.A.2.g - Mules et ânes\"),\n",
|
|
|
+ " (25, \"3.A.2.h - Porcins\"),\n",
|
|
|
+ " (26, \"3.A.2.i - Volailles\"),]\n",
|
|
|
+ " for index, category_name in replace_categories:\n",
|
|
|
+ " df_lulucf_year.at[index, 0] = category_name\n",
|
|
|
+ " \n",
|
|
|
+ " # add header and unit\n",
|
|
|
+ " df_header = pd.DataFrame([inv_conf[\"header_lulucf\"], inv_conf[\"unit_lulucf\"]])\n",
|
|
|
+ "\n",
|
|
|
+ " df_lulucf_year = pd.concat([df_header, df_lulucf_year], axis=0, join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ " df_lulucf_year = pm2.pm2io.nir_add_unit_information(df_lulucf_year,\n",
|
|
|
+ " unit_row=inv_conf[\"unit_row\"],\n",
|
|
|
+ " entity_row=inv_conf[\"entity_row\"],\n",
|
|
|
+ " regexp_entity=\".*\",\n",
|
|
|
+ " regexp_unit=\".*\",\n",
|
|
|
+ " default_unit=\"Gg\")\n",
|
|
|
+ "\n",
|
|
|
+ " print(\"Added unit information.\")\n",
|
|
|
+ " \n",
|
|
|
+ " # set index\n",
|
|
|
+ " df_lulucf_year = df_lulucf_year.set_index(inv_conf[\"index_cols\"])\n",
|
|
|
+ "\n",
|
|
|
+ " # convert to long format\n",
|
|
|
+ " df_lulucf_year_long = pm2.pm2io.nir_convert_df_to_long(df_lulucf_year, inv_conf[\"year\"][page],\n",
|
|
|
+ " inv_conf[\"header_long\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_lulucf_year_long[\"orig_cat_name\"] = df_lulucf_year_long[\"orig_cat_name\"].str[0] # extract from tuple\n",
|
|
|
+ "\n",
|
|
|
+ " # prep for conversion to PM2 IF and native format\n",
|
|
|
+ " # make a copy of the categories row\n",
|
|
|
+ " df_lulucf_year_long[\"category\"] = df_lulucf_year_long[\"orig_cat_name\"]\n",
|
|
|
+ " \n",
|
|
|
+ " # regex replacements\n",
|
|
|
+ " repl = lambda m: m.group('code')\n",
|
|
|
+ " df_lulucf_year_long[\"category\"] = \\\n",
|
|
|
+ " df_lulucf_year_long[\"category\"].str.replace(inv_conf[\"cat_code_regexp\"], repl,\n",
|
|
|
+ " regex=True)\n",
|
|
|
+ " \n",
|
|
|
+ " df_lulucf_year_long = df_lulucf_year_long.reset_index(drop=True)\n",
|
|
|
+ " \n",
|
|
|
+ " df_lulucf_year_long[\"data\"] = df_lulucf_year_long[\"data\"].str.replace(\",\", \".\")\n",
|
|
|
+ " df_lulucf_year_long[\"data\"] = df_lulucf_year_long[\"data\"].str.replace(\"NE1\", \"NE\")\n",
|
|
|
+ "\n",
|
|
|
+ " # make sure all col headers are str\n",
|
|
|
+ " df_lulucf_year_long.columns = df_lulucf_year_long.columns.map(str)\n",
|
|
|
+ " df_lulucf_year_long = df_lulucf_year_long.drop(columns=[\"orig_cat_name\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_lulucf_dict[page] = df_lulucf_year_long\n",
|
|
|
+ "\n",
|
|
|
+ "df_lulucf = pd.concat([df_lulucf_dict['124'], df_lulucf_dict['125'], df_lulucf_dict['126'], df_lulucf_dict['127']],\n",
|
|
|
+ " axis=0,\n",
|
|
|
+ " join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"Converting to interchange format.\")\n",
|
|
|
+ "df_lulucf_IF = pm2.pm2io.convert_long_dataframe_if(\n",
|
|
|
+ " df_lulucf,\n",
|
|
|
+ " coords_cols=coords_cols,\n",
|
|
|
+ " #add_coords_cols=add_coords_cols,\n",
|
|
|
+ " coords_defaults=coords_defaults,\n",
|
|
|
+ " coords_terminologies=coords_terminologies,\n",
|
|
|
+ " coords_value_mapping=coords_value_mapping['lulucf'],\n",
|
|
|
+ " #coords_value_filling=coords_value_filling,\n",
|
|
|
+ " filter_remove=filter_remove,\n",
|
|
|
+ " #filter_keep=filter_keep,\n",
|
|
|
+ " meta_data=meta_data,\n",
|
|
|
+ " convert_str=True,\n",
|
|
|
+ " time_format=\"%Y\",\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ "df_lulucf_IF"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 51,
|
|
|
+ "id": "8d132ea2-655a-4363-9171-b81904a7d6d1",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-22 09:22:15.333\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2.pm2io._interchange_format\u001b[0m:\u001b[36mfrom_interchange_format\u001b[0m:\u001b[36m320\u001b[0m - \u001b[34m\u001b[1mExpected array shapes: [[1, 1, 1, 1, 6, 79], [1, 1, 1, 1, 6, 79], [1, 1, 1, 1, 6, 79], [1, 1, 1, 1, 6, 79], [1, 1, 1, 1, 6, 79], [1, 1, 1, 1, 6, 79]], resulting in size 2,844.\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 09:22:15.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprimap2._data_format\u001b[0m:\u001b[36mensure_valid_attributes\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mReference information is not a DOI: 'placeholder'\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### convert to primap2 format ###\n",
|
|
|
+ "data_pm2_lulucf = pm2.pm2io.from_interchange_format(df_lulucf_IF)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "a99c689e-1f26-42d5-8974-194373ce26f6",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# 3. Read in Waste tables - pages 128, 130"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 52,
|
|
|
+ "id": "fcf17dba-6af4-400f-9ec3-b5dd5b1b0a82",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# There are three tables for three years on page 128\n",
|
|
|
+ "# and another tabel on page 130\n",
|
|
|
+ "\n",
|
|
|
+ "# read three tables\n",
|
|
|
+ "page = '128'\n",
|
|
|
+ "tables_inventory_original_128 = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " flavor=\"lattice\",\n",
|
|
|
+ " split_text=True\n",
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
+ "# read last table\n",
|
|
|
+ "page = '130'\n",
|
|
|
+ "tables_inventory_original_130 = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " flavor=\"lattice\",\n",
|
|
|
+ " split_text=True\n",
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
+ "# save to dict\n",
|
|
|
+ "df_waste_years = {\n",
|
|
|
+ " '1990' : tables_inventory_original_128[0].df,\n",
|
|
|
+ " '2000' : tables_inventory_original_128[1].df,\n",
|
|
|
+ " '2010' : tables_inventory_original_128[2].df,\n",
|
|
|
+ " '2019' : tables_inventory_original_130[0].df,\n",
|
|
|
+ "}\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 57,
|
|
|
+ "id": "4e0afb6e-db8b-41ae-b02d-e4a5d54ea5ed",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Processing table for 1990.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Processing table for 2000.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Processing table for 2010.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Processing table for 2019.\n",
|
|
|
+ "Added unit information.\n",
|
|
|
+ "Converting to interchange format.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " <th>scenario (PRIMAP)</th>\n",
|
|
|
+ " <th>provenance</th>\n",
|
|
|
+ " <th>area (ISO3)</th>\n",
|
|
|
+ " <th>entity</th>\n",
|
|
|
+ " <th>unit</th>\n",
|
|
|
+ " <th>category (IPCC1996_2006_GIN_Inv)</th>\n",
|
|
|
+ " <th>1990</th>\n",
|
|
|
+ " <th>2000</th>\n",
|
|
|
+ " <th>2010</th>\n",
|
|
|
+ " <th>2019</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>4</td>\n",
|
|
|
+ " <td>1.750</td>\n",
|
|
|
+ " <td>2.925</td>\n",
|
|
|
+ " <td>4.534</td>\n",
|
|
|
+ " <td>6.665</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>4.A</td>\n",
|
|
|
+ " <td>1.029</td>\n",
|
|
|
+ " <td>2.054</td>\n",
|
|
|
+ " <td>3.323</td>\n",
|
|
|
+ " <td>5.170</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>4.A.1</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>4.A.2</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>4.A.3</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>...</th>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>86</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>4.C.2</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>87</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>4.D</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>88</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>4.D.1</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>89</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>4.D.2</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>90</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>SO2</td>\n",
|
|
|
+ " <td>Gg SO2 / yr</td>\n",
|
|
|
+ " <td>4.E</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "<p>91 rows × 11 columns</p>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " source scenario (PRIMAP) provenance area (ISO3) entity \\\n",
|
|
|
+ "0 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "1 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "2 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "3 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "4 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "86 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "87 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "88 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "89 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "90 GIN-GHG-Inventory BUR1 measured GIN SO2 \n",
|
|
|
+ "\n",
|
|
|
+ " unit category (IPCC1996_2006_GIN_Inv) 1990 2000 2010 2019 \n",
|
|
|
+ "0 Gg CH4 / yr 4 1.750 2.925 4.534 6.665 \n",
|
|
|
+ "1 Gg CH4 / yr 4.A 1.029 2.054 3.323 5.170 \n",
|
|
|
+ "2 Gg CH4 / yr 4.A.1 NaN NaN NaN NaN \n",
|
|
|
+ "3 Gg CH4 / yr 4.A.2 NaN NaN NaN NaN \n",
|
|
|
+ "4 Gg CH4 / yr 4.A.3 NaN NaN NaN NaN \n",
|
|
|
+ ".. ... ... ... ... ... ... \n",
|
|
|
+ "86 Gg SO2 / yr 4.C.2 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "87 Gg SO2 / yr 4.D 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "88 Gg SO2 / yr 4.D.1 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "89 Gg SO2 / yr 4.D.2 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "90 Gg SO2 / yr 4.E 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "\n",
|
|
|
+ "[91 rows x 11 columns]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 57,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "df_waste_dict = {}\n",
|
|
|
+ "for year in df_waste_years.keys():\n",
|
|
|
+ " print(\"-\"*45)\n",
|
|
|
+ " print(f\"Processing table for {year}.\")\n",
|
|
|
+ "\n",
|
|
|
+ " df_waste_year = df_waste_years[year][2:]\n",
|
|
|
+ " \n",
|
|
|
+ " # add header and unit\n",
|
|
|
+ " df_header = pd.DataFrame([inv_conf[\"header_waste\"], inv_conf[\"unit_waste\"]])\n",
|
|
|
+ "\n",
|
|
|
+ " df_waste_year = pd.concat([df_header, df_waste_year], axis=0, join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ " df_waste_year = pm2.pm2io.nir_add_unit_information(df_waste_year,\n",
|
|
|
+ " unit_row=inv_conf[\"unit_row\"],\n",
|
|
|
+ " entity_row=inv_conf[\"entity_row\"],\n",
|
|
|
+ " regexp_entity=\".*\",\n",
|
|
|
+ " regexp_unit=\".*\",\n",
|
|
|
+ " default_unit=\"Gg\")\n",
|
|
|
+ "\n",
|
|
|
+ " print(\"Added unit information.\")\n",
|
|
|
+ " \n",
|
|
|
+ " # set index\n",
|
|
|
+ " df_waste_year = df_waste_year.set_index(inv_conf[\"index_cols\"])\n",
|
|
|
+ "\n",
|
|
|
+ " # convert to long format\n",
|
|
|
+ " df_waste_year_long = pm2.pm2io.nir_convert_df_to_long(df_waste_year, year,\n",
|
|
|
+ " inv_conf[\"header_long\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_waste_year_long[\"orig_cat_name\"] = df_waste_year_long[\"orig_cat_name\"].str[0]\n",
|
|
|
+ "\n",
|
|
|
+ " # prep for conversion to PM2 IF and native format\n",
|
|
|
+ " # make a copy of the categories row\n",
|
|
|
+ " df_waste_year_long[\"category\"] = df_waste_year_long[\"orig_cat_name\"]\n",
|
|
|
+ "\n",
|
|
|
+ " # regex replacements\n",
|
|
|
+ " repl = lambda m: m.group('code')\n",
|
|
|
+ " df_waste_year_long[\"category\"] = \\\n",
|
|
|
+ " df_waste_year_long[\"category\"].str.replace(inv_conf[\"cat_code_regexp\"], repl,\n",
|
|
|
+ " regex=True)\n",
|
|
|
+ " \n",
|
|
|
+ " df_waste_year_long = df_waste_year_long.reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ " df_waste_year_long[\"category\"] = df_waste_year_long[\"category\"].str.replace(\".\", \"\")\n",
|
|
|
+ " df_waste_year_long[\"data\"] = df_waste_year_long[\"data\"].str.replace(\",\", \".\")\n",
|
|
|
+ " df_waste_year_long[\"data\"] = df_waste_year_long[\"data\"].str.replace(\"NE1\", \"NE\")\n",
|
|
|
+ "\n",
|
|
|
+ " # make sure all col headers are str\n",
|
|
|
+ " df_waste_year_long.columns = df_waste_year_long.columns.map(str)\n",
|
|
|
+ " df_waste_year_long = df_waste_year_long.drop(columns=[\"orig_cat_name\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_waste_dict[year] = df_waste_year_long\n",
|
|
|
+ "\n",
|
|
|
+ "df_waste = pd.concat([df_waste_dict['1990'], df_waste_dict['2000'], df_waste_dict['2010'], df_waste_dict['2019']],\n",
|
|
|
+ " axis=0,\n",
|
|
|
+ " join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"Converting to interchange format.\")\n",
|
|
|
+ "df_waste_IF = pm2.pm2io.convert_long_dataframe_if(\n",
|
|
|
+ " df_waste,\n",
|
|
|
+ " coords_cols=coords_cols,\n",
|
|
|
+ " #add_coords_cols=add_coords_cols,\n",
|
|
|
+ " coords_defaults=coords_defaults,\n",
|
|
|
+ " coords_terminologies=coords_terminologies,\n",
|
|
|
+ " coords_value_mapping=coords_value_mapping['waste'],\n",
|
|
|
+ " #coords_value_filling=coords_value_filling,\n",
|
|
|
+ " filter_remove=filter_remove,\n",
|
|
|
+ " #filter_keep=filter_keep,\n",
|
|
|
+ " meta_data=meta_data,\n",
|
|
|
+ " convert_str=True,\n",
|
|
|
+ " time_format=\"%Y\",\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ "df_waste_IF"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 58,
|
|
|
+ "id": "6628eacb-8a24-415b-a42e-04e929976f83",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-22 09:27:11.859\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2.pm2io._interchange_format\u001b[0m:\u001b[36mfrom_interchange_format\u001b[0m:\u001b[36m320\u001b[0m - \u001b[34m\u001b[1mExpected array shapes: [[1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13], [1, 1, 1, 1, 7, 13]], resulting in size 637.\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 09:27:11.898\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprimap2._data_format\u001b[0m:\u001b[36mensure_valid_attributes\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mReference information is not a DOI: 'placeholder'\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### convert to primap2 format ###\n",
|
|
|
+ "data_pm2_waste = pm2.pm2io.from_interchange_format(df_waste_IF)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "ba512153-1c65-4568-9bae-817fbf9cc9b3",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# 4. Read in trend tables - pages 131 - 137"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 67,
|
|
|
+ "id": "0e71c7b2-c301-4048-8b92-c9fc58a2501f",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 131 and entity CO2.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 132 and entity CH4.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 133 and entity N2O.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 134 and entity NOx.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 135 and entity CO.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 136 and entity NMVOCs.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "---------------------------------------------\n",
|
|
|
+ "Reading table for page 137 and entity SO2.\n",
|
|
|
+ "Reading complete.\n",
|
|
|
+ "Created category codes.\n",
|
|
|
+ "Converted to long format.\n",
|
|
|
+ "Converting to interchange format.\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>source</th>\n",
|
|
|
+ " <th>scenario (PRIMAP)</th>\n",
|
|
|
+ " <th>provenance</th>\n",
|
|
|
+ " <th>area (ISO3)</th>\n",
|
|
|
+ " <th>entity</th>\n",
|
|
|
+ " <th>unit</th>\n",
|
|
|
+ " <th>category (IPCC1996_2006_GIN_Inv)</th>\n",
|
|
|
+ " <th>1990</th>\n",
|
|
|
+ " <th>1995</th>\n",
|
|
|
+ " <th>2000</th>\n",
|
|
|
+ " <th>2005</th>\n",
|
|
|
+ " <th>2010</th>\n",
|
|
|
+ " <th>2015</th>\n",
|
|
|
+ " <th>2018</th>\n",
|
|
|
+ " <th>2019</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>65.202</td>\n",
|
|
|
+ " <td>93.368</td>\n",
|
|
|
+ " <td>119.981</td>\n",
|
|
|
+ " <td>152.272</td>\n",
|
|
|
+ " <td>196.057</td>\n",
|
|
|
+ " <td>253.025</td>\n",
|
|
|
+ " <td>296.416</td>\n",
|
|
|
+ " <td>312.034</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>6.465</td>\n",
|
|
|
+ " <td>7.066</td>\n",
|
|
|
+ " <td>6.489</td>\n",
|
|
|
+ " <td>5.984</td>\n",
|
|
|
+ " <td>4.849</td>\n",
|
|
|
+ " <td>5.360</td>\n",
|
|
|
+ " <td>5.931</td>\n",
|
|
|
+ " <td>5.866</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A</td>\n",
|
|
|
+ " <td>6.465</td>\n",
|
|
|
+ " <td>7.066</td>\n",
|
|
|
+ " <td>6.489</td>\n",
|
|
|
+ " <td>5.984</td>\n",
|
|
|
+ " <td>4.849</td>\n",
|
|
|
+ " <td>5.360</td>\n",
|
|
|
+ " <td>5.931</td>\n",
|
|
|
+ " <td>5.866</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A.1</td>\n",
|
|
|
+ " <td>0.032</td>\n",
|
|
|
+ " <td>0.027</td>\n",
|
|
|
+ " <td>0.024</td>\n",
|
|
|
+ " <td>0.020</td>\n",
|
|
|
+ " <td>0.016</td>\n",
|
|
|
+ " <td>0.002</td>\n",
|
|
|
+ " <td>0.005</td>\n",
|
|
|
+ " <td>0.001</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CH4</td>\n",
|
|
|
+ " <td>Gg CH4 / yr</td>\n",
|
|
|
+ " <td>1.A.2</td>\n",
|
|
|
+ " <td>0.006</td>\n",
|
|
|
+ " <td>0.012</td>\n",
|
|
|
+ " <td>0.018</td>\n",
|
|
|
+ " <td>0.023</td>\n",
|
|
|
+ " <td>0.028</td>\n",
|
|
|
+ " <td>0.024</td>\n",
|
|
|
+ " <td>0.026</td>\n",
|
|
|
+ " <td>0.033</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>...</th>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " <td>...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>151</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CO2</td>\n",
|
|
|
+ " <td>Gg CO2 / yr</td>\n",
|
|
|
+ " <td>5</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>152</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CO2</td>\n",
|
|
|
+ " <td>Gg CO2 / yr</td>\n",
|
|
|
+ " <td>M.BK</td>\n",
|
|
|
+ " <td>0.719</td>\n",
|
|
|
+ " <td>1.438</td>\n",
|
|
|
+ " <td>2.158</td>\n",
|
|
|
+ " <td>19.529</td>\n",
|
|
|
+ " <td>36.900</td>\n",
|
|
|
+ " <td>21.840</td>\n",
|
|
|
+ " <td>51.718</td>\n",
|
|
|
+ " <td>66.197</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>153</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CO2</td>\n",
|
|
|
+ " <td>Gg CO2 / yr</td>\n",
|
|
|
+ " <td>M.BK.A</td>\n",
|
|
|
+ " <td>0.719</td>\n",
|
|
|
+ " <td>1.438</td>\n",
|
|
|
+ " <td>2.158</td>\n",
|
|
|
+ " <td>19.529</td>\n",
|
|
|
+ " <td>36.900</td>\n",
|
|
|
+ " <td>21.840</td>\n",
|
|
|
+ " <td>51.718</td>\n",
|
|
|
+ " <td>66.197</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>154</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CO2</td>\n",
|
|
|
+ " <td>Gg CO2 / yr</td>\n",
|
|
|
+ " <td>M.BK.M</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " <td>NaN</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>155</th>\n",
|
|
|
+ " <td>GIN-GHG-Inventory</td>\n",
|
|
|
+ " <td>BUR1</td>\n",
|
|
|
+ " <td>measured</td>\n",
|
|
|
+ " <td>GIN</td>\n",
|
|
|
+ " <td>CO2</td>\n",
|
|
|
+ " <td>Gg CO2 / yr</td>\n",
|
|
|
+ " <td>M.MULTIOP</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " <td>0.000</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "<p>156 rows × 15 columns</p>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " source scenario (PRIMAP) provenance area (ISO3) entity \\\n",
|
|
|
+ "0 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "1 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "2 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "3 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ "4 GIN-GHG-Inventory BUR1 measured GIN CH4 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "151 GIN-GHG-Inventory BUR1 measured GIN CO2 \n",
|
|
|
+ "152 GIN-GHG-Inventory BUR1 measured GIN CO2 \n",
|
|
|
+ "153 GIN-GHG-Inventory BUR1 measured GIN CO2 \n",
|
|
|
+ "154 GIN-GHG-Inventory BUR1 measured GIN CO2 \n",
|
|
|
+ "155 GIN-GHG-Inventory BUR1 measured GIN CO2 \n",
|
|
|
+ "\n",
|
|
|
+ " unit category (IPCC1996_2006_GIN_Inv) 1990 1995 2000 \\\n",
|
|
|
+ "0 Gg CH4 / yr 0 65.202 93.368 119.981 \n",
|
|
|
+ "1 Gg CH4 / yr 1 6.465 7.066 6.489 \n",
|
|
|
+ "2 Gg CH4 / yr 1.A 6.465 7.066 6.489 \n",
|
|
|
+ "3 Gg CH4 / yr 1.A.1 0.032 0.027 0.024 \n",
|
|
|
+ "4 Gg CH4 / yr 1.A.2 0.006 0.012 0.018 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "151 Gg CO2 / yr 5 NaN NaN NaN \n",
|
|
|
+ "152 Gg CO2 / yr M.BK 0.719 1.438 2.158 \n",
|
|
|
+ "153 Gg CO2 / yr M.BK.A 0.719 1.438 2.158 \n",
|
|
|
+ "154 Gg CO2 / yr M.BK.M NaN NaN NaN \n",
|
|
|
+ "155 Gg CO2 / yr M.MULTIOP 0.000 0.000 0.000 \n",
|
|
|
+ "\n",
|
|
|
+ " 2005 2010 2015 2018 2019 \n",
|
|
|
+ "0 152.272 196.057 253.025 296.416 312.034 \n",
|
|
|
+ "1 5.984 4.849 5.360 5.931 5.866 \n",
|
|
|
+ "2 5.984 4.849 5.360 5.931 5.866 \n",
|
|
|
+ "3 0.020 0.016 0.002 0.005 0.001 \n",
|
|
|
+ "4 0.023 0.028 0.024 0.026 0.033 \n",
|
|
|
+ ".. ... ... ... ... ... \n",
|
|
|
+ "151 NaN NaN NaN NaN NaN \n",
|
|
|
+ "152 19.529 36.900 21.840 51.718 66.197 \n",
|
|
|
+ "153 19.529 36.900 21.840 51.718 66.197 \n",
|
|
|
+ "154 NaN NaN NaN NaN NaN \n",
|
|
|
+ "155 0.000 0.000 0.000 0.000 0.000 \n",
|
|
|
+ "\n",
|
|
|
+ "[156 rows x 15 columns]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 67,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#%matplotlib widget \n",
|
|
|
+ "#camelot.plot(tables_inventory_original[0], kind='text')\n",
|
|
|
+ "\n",
|
|
|
+ "df_main_dict = {}\n",
|
|
|
+ "pages = ['131', '132', '133', '134', '135', '136', '137']\n",
|
|
|
+ "entities = ['CO2', 'CH4', 'N2O', 'NOx', 'CO', 'NMVOCs', 'SO2']\n",
|
|
|
+ "\n",
|
|
|
+ "# for this set of tables every page is a different entity\n",
|
|
|
+ "for page, entity in zip(pages, entities):\n",
|
|
|
+ "\n",
|
|
|
+ " print(\"-\"*45)\n",
|
|
|
+ " print(f\"Reading table for page {page} and entity {entity}.\")\n",
|
|
|
+ " \n",
|
|
|
+ " # first table needs to be read in with flavor=\"stream\"\n",
|
|
|
+ " # flavor=\"lattice\" raises an error, maybe camelot issue\n",
|
|
|
+ " # see https://github.com/atlanhq/camelot/issues/306\n",
|
|
|
+ " # or because characters in first row almost reach\n",
|
|
|
+ " # the table grid \n",
|
|
|
+ " if page == '131':\n",
|
|
|
+ " tables_inventory_original = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " table_areas=page_def_templates[page][\"area\"],\n",
|
|
|
+ " columns=page_def_templates[page][\"cols\"],\n",
|
|
|
+ " flavor=\"stream\",\n",
|
|
|
+ " split_text=True\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity = tables_inventory_original[0].df[1:]\n",
|
|
|
+ " else:\n",
|
|
|
+ " tables_inventory_original = camelot.read_pdf(\n",
|
|
|
+ " str(input_folder / pdf_file),\n",
|
|
|
+ " pages=page,\n",
|
|
|
+ " flavor=\"lattice\",\n",
|
|
|
+ " split_text=True)\n",
|
|
|
+ " df_trend_entity = tables_inventory_original[0].df[3:]\n",
|
|
|
+ "\n",
|
|
|
+ " print(f\"Reading complete.\")\n",
|
|
|
+ "\n",
|
|
|
+ " # add columns\n",
|
|
|
+ " # 'data' prefix is needed for pd.wide_to_long() later\n",
|
|
|
+ " columns_years = ['data1990', 'data1995', \"data2000\", 'data2005', 'data2010', 'data2015', 'data2018', 'data2019']\n",
|
|
|
+ " df_trend_entity.columns = ['orig_cat_name'] + columns_years\n",
|
|
|
+ " \n",
|
|
|
+ " # unit is always Gg\n",
|
|
|
+ " df_trend_entity['unit'] = 'Gg'\n",
|
|
|
+ " \n",
|
|
|
+ " # only one entity per table\n",
|
|
|
+ " df_trend_entity['entity'] = entity\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity[\"category\"] = df_trend_entity[\"orig_cat_name\"]\n",
|
|
|
+ "\n",
|
|
|
+ " # delete rows that are just a headline or empty\n",
|
|
|
+ " #row_to_delete = df_trend_entity.index[df_trend_entity['category'] == 'Éléments pour mémoire'][0]\n",
|
|
|
+ " #df_trend_entity = df_trend_entity.drop(index = row_to_delete)\n",
|
|
|
+ "\n",
|
|
|
+ " # in the first table there is no empty line\n",
|
|
|
+ " if page != '131':\n",
|
|
|
+ " row_to_delete = df_trend_entity.index[df_trend_entity['category'] == ''][0]\n",
|
|
|
+ " df_trend_entity = df_trend_entity.drop(index = row_to_delete)\n",
|
|
|
+ " \n",
|
|
|
+ " inv_conf[\"cat_code_regexp\"] = r'^(?P<code>[a-zA-Z0-9\\.]{1,11})[\\s\\.].*'\n",
|
|
|
+ "\n",
|
|
|
+ " df_trend_entity[\"category\"] = df_trend_entity[\"category\"].replace(\n",
|
|
|
+ " {\n",
|
|
|
+ " 'Total des émissions et absorptions nationales': \"0\",\n",
|
|
|
+ " '2A5: Autre' : '2A5',\n",
|
|
|
+ " 'Éléments pour mémoire': 'MEMO',\n",
|
|
|
+ " 'Soutes internationales' : 'M.BK',\n",
|
|
|
+ " '1.A.3.a.i - Aviation internationale (soutes internationales)' : 'M.BK.A',\n",
|
|
|
+ " '1.A.3.d.i - Navigation internationale (soutes internationales)' : 'M.BK.M',\n",
|
|
|
+ " '1.A.5.c - Opérations multilatérales' : 'M.MULTIOP',\n",
|
|
|
+ " })\n",
|
|
|
+ "\n",
|
|
|
+ " df_trend_entity[\"category\"] = df_trend_entity[\"category\"].str.replace(\".\", \"\")\n",
|
|
|
+ " df_trend_entity[\"category\"] = df_trend_entity[\"category\"].str.replace(\"\\n\", \"\")\n",
|
|
|
+ " \n",
|
|
|
+ " \n",
|
|
|
+ " repl = lambda m: m.group('code')\n",
|
|
|
+ " df_trend_entity[\"category\"] = \\\n",
|
|
|
+ " df_trend_entity[\"category\"].str.replace(inv_conf[\"cat_code_regexp\"], repl,\n",
|
|
|
+ " regex=True)\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity = df_trend_entity.reset_index(drop=True)\n",
|
|
|
+ " \n",
|
|
|
+ " print(f\"Created category codes.\")\n",
|
|
|
+ " \n",
|
|
|
+ " for year in columns_years:\n",
|
|
|
+ " df_trend_entity[year] = df_trend_entity[year].str.replace(\",\", \".\")\n",
|
|
|
+ " df_trend_entity[year] = df_trend_entity[year].str.replace(\"NE1\", \"NE\")\n",
|
|
|
+ " \n",
|
|
|
+ " # make sure all col headers are str\n",
|
|
|
+ " df_trend_entity.columns = df_trend_entity.columns.map(str)\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity = df_trend_entity.drop(columns=[\"orig_cat_name\"])\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity_long = pd.wide_to_long(df_trend_entity, stubnames='data', i='category', j='time')\n",
|
|
|
+ " \n",
|
|
|
+ " print(f\"Converted to long format.\")\n",
|
|
|
+ " \n",
|
|
|
+ " df_trend_entity_long = df_trend_entity_long.reset_index()\n",
|
|
|
+ " \n",
|
|
|
+ " df_main_dict[page] = df_trend_entity_long\n",
|
|
|
+ "\n",
|
|
|
+ "print(\"Converting to interchange format.\")\n",
|
|
|
+ "\n",
|
|
|
+ "df_trend_all = pd.concat([df_main_dict['131'], df_main_dict['132']], axis=0, join='outer').reset_index(drop=True)\n",
|
|
|
+ "\n",
|
|
|
+ "df_trend_IF = pm2.pm2io.convert_long_dataframe_if(\n",
|
|
|
+ " df_trend_all,\n",
|
|
|
+ " coords_cols=coords_cols,\n",
|
|
|
+ " #add_coords_cols=add_coords_cols,\n",
|
|
|
+ " coords_defaults=coords_defaults,\n",
|
|
|
+ " coords_terminologies=coords_terminologies,\n",
|
|
|
+ " coords_value_mapping=coords_value_mapping['trend'],\n",
|
|
|
+ " #coords_value_filling=coords_value_filling,\n",
|
|
|
+ " filter_remove=filter_remove,\n",
|
|
|
+ " #filter_keep=filter_keep,\n",
|
|
|
+ " meta_data=meta_data,\n",
|
|
|
+ " convert_str=True,\n",
|
|
|
+ " time_format=\"%Y\",\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ "df_trend_IF\n",
|
|
|
+ " "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 68,
|
|
|
+ "id": "05e1ad4f-c35c-460c-8546-5e493f363739",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-22 09:52:43.765\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2.pm2io._interchange_format\u001b[0m:\u001b[36mfrom_interchange_format\u001b[0m:\u001b[36m320\u001b[0m - \u001b[34m\u001b[1mExpected array shapes: [[1, 1, 1, 1, 2, 78], [1, 1, 1, 1, 2, 78]], resulting in size 312.\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 09:52:43.826\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mprimap2._data_format\u001b[0m:\u001b[36mensure_valid_attributes\u001b[0m:\u001b[36m292\u001b[0m - \u001b[1mReference information is not a DOI: 'placeholder'\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "### convert to primap2 format ###\n",
|
|
|
+ "data_pm2_trend = pm2.pm2io.from_interchange_format(df_trend_IF)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "id": "c3b65227-b7c4-4d18-89ef-af927c9a81b5",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Combine tables and save to IF and native format"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 72,
|
|
|
+ "id": "960117b6-28fc-45ba-a768-16f63e428875",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stderr",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\u001b[32m2024-03-22 10:09:36.801\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge\u001b[0m:\u001b[36m230\u001b[0m - \u001b[34m\u001b[1mmerging for CH4\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 10:09:37.026\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge\u001b[0m:\u001b[36m230\u001b[0m - \u001b[34m\u001b[1mmerging for CO2\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 10:09:37.187\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge\u001b[0m:\u001b[36m230\u001b[0m - \u001b[34m\u001b[1mmerging for N2O\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 10:09:37.351\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge\u001b[0m:\u001b[36m230\u001b[0m - \u001b[34m\u001b[1mmerging for SO2\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 10:09:37.448\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge\u001b[0m:\u001b[36m230\u001b[0m - \u001b[34m\u001b[1mmerging for NMVOC\u001b[0m\n",
|
|
|
+ "\u001b[32m2024-03-22 10:09:37.533\u001b[0m | \u001b[31m\u001b[1mERROR \u001b[0m | \u001b[36mprimap2._merge\u001b[0m:\u001b[36mmerge_with_tolerance_core\u001b[0m:\u001b[36m74\u001b[0m - \u001b[31m\u001b[1mpr.merge error: found discrepancies larger than tolerance (11.00%) for source=GIN-GHG-Inventory, scenario (PRIMAP)=BUR1, provenance=measured, area (ISO3)=GIN, category (IPCC1996_2006_GIN_Inv)=1.A.2:\n",
|
|
|
+ "shown are relative discrepancies.\n",
|
|
|
+ " NMVOC\n",
|
|
|
+ "time \n",
|
|
|
+ "1990-01-01 0.800000\n",
|
|
|
+ "2000-01-01 0.800000\n",
|
|
|
+ "2010-01-01 0.869848\u001b[0m\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "ename": "MergeError",
|
|
|
+ "evalue": "pr.merge error: found discrepancies larger than tolerance (11.00%) for source=GIN-GHG-Inventory, scenario (PRIMAP)=BUR1, provenance=measured, area (ISO3)=GIN, category (IPCC1996_2006_GIN_Inv)=1.A.2:\nshown are relative discrepancies.\n NMVOC\ntime \n1990-01-01 0.800000\n2000-01-01 0.800000\n2010-01-01 0.869848",
|
|
|
+ "output_type": "error",
|
|
|
+ "traceback": [
|
|
|
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|
|
+ "\u001b[0;31mMergeError\u001b[0m Traceback (most recent call last)",
|
|
|
+ "Cell \u001b[0;32mIn[72], line 10\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#### combine\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#data_pm2_main\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 8\u001b[0m \n\u001b[1;32m 9\u001b[0m \u001b[38;5;66;03m# tolerance needs to be high as rounding in trend tables leads to inconsistent data\u001b[39;00m\n\u001b[0;32m---> 10\u001b[0m data_pm2 \u001b[38;5;241m=\u001b[39m \u001b[43mdata_pm2_main\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmerge\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata_pm2_energy\u001b[49m\u001b[43m,\u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m0.11\u001b[39;49m\u001b[43m)\u001b[49m\n",
|
|
|
+ "File \u001b[0;32m~/Documents/UNFCCC_non-AnnexI_data/venv/lib/python3.12/site-packages/primap2/_merge.py:231\u001b[0m, in \u001b[0;36mDatasetMergeAccessor.merge\u001b[0;34m(self, ds_merge, tolerance, error_on_discrepancy, combine_attrs)\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m var \u001b[38;5;129;01min\u001b[39;00m vars_common:\n\u001b[1;32m 230\u001b[0m logger\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmerging for \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mvar\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 231\u001b[0m ds_result_new \u001b[38;5;241m=\u001b[39m \u001b[43mmerge_with_tolerance_core\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43mda_start\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds_start\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvar\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43mda_merge\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mds_merge\u001b[49m\u001b[43m[\u001b[49m\u001b[43mvar\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mtolerance\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtolerance\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_on_discrepancy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_on_discrepancy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 237\u001b[0m ds_result \u001b[38;5;241m=\u001b[39m xr\u001b[38;5;241m.\u001b[39mmerge([ds_result, ds_result_new], combine_attrs\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124moverride\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 238\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m ds_result\n",
|
|
|
+ "File \u001b[0;32m~/Documents/UNFCCC_non-AnnexI_data/venv/lib/python3.12/site-packages/primap2/_merge.py:75\u001b[0m, in \u001b[0;36mmerge_with_tolerance_core\u001b[0;34m(da_start, da_merge, tolerance, error_on_discrepancy)\u001b[0m\n\u001b[1;32m 73\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m error_on_discrepancy:\n\u001b[1;32m 74\u001b[0m logger\u001b[38;5;241m.\u001b[39merror(log_message)\n\u001b[0;32m---> 75\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m xr\u001b[38;5;241m.\u001b[39mMergeError(log_message)\n\u001b[1;32m 76\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 77\u001b[0m \u001b[38;5;66;03m# log warning, continue with merging\u001b[39;00m\n\u001b[1;32m 78\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(log_message)\n",
|
|
|
+ "\u001b[0;31mMergeError\u001b[0m: pr.merge error: found discrepancies larger than tolerance (11.00%) for source=GIN-GHG-Inventory, scenario (PRIMAP)=BUR1, provenance=measured, area (ISO3)=GIN, category (IPCC1996_2006_GIN_Inv)=1.A.2:\nshown are relative discrepancies.\n NMVOC\ntime \n1990-01-01 0.800000\n2000-01-01 0.800000\n2010-01-01 0.869848"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "#### combine\n",
|
|
|
+ "\n",
|
|
|
+ "#data_pm2_main\n",
|
|
|
+ "#data_pm2_trend\n",
|
|
|
+ "#data_pm2_energy\n",
|
|
|
+ "#data_pm2_lulucf\n",
|
|
|
+ "#data_pm2_waste\n",
|
|
|
+ "\n",
|
|
|
+ "# tolerance needs to be high as rounding in trend tables leads to inconsistent data\n",
|
|
|
+ "data_pm2 = data_pm2_main.pr.merge(data_pm2_energy,tolerance=0.11)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "id": "3cb74c9e-b400-454b-848a-28091b832016",
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "# convert back to IF to have units in the fixed format\n",
|
|
|
+ "data_if = data_pm2.pr.to_interchange_format()\n",
|
|
|
+ "\n",
|
|
|
+ "# ###\n",
|
|
|
+ "# save data to IF and native format\n",
|
|
|
+ "# ###\n",
|
|
|
+ "pm2.pm2io.write_interchange_format(\n",
|
|
|
+ " output_folder / (output_filename + coords_terminologies[\"category\"] + \"_raw\"), data_if)\n",
|
|
|
+ "\n",
|
|
|
+ "encoding = {var: compression for var in data_pm2.data_vars}\n",
|
|
|
+ "data_pm2.pr.to_netcdf(\n",
|
|
|
+ " output_folder / (output_filename + coords_terminologies[\"category\"] + \"_raw.nc\"),\n",
|
|
|
+ " encoding=encoding)"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3 (ipykernel)",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.12.2"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 5
|
|
|
+}
|