|
@@ -4,8 +4,11 @@
|
|
|
import camelot
|
|
|
import primap2 as pm2
|
|
|
import pandas as pd
|
|
|
-import numpy as np
|
|
|
+import copy
|
|
|
from pathlib import Path
|
|
|
+from config_MAR_BUR3 import zero_cats, cat_mapping, aggregate_cats, remove_cats, \
|
|
|
+ table_defs, header_defs
|
|
|
+from primap2.pm2io._data_reading import matches_time_format, filter_data
|
|
|
|
|
|
# ###
|
|
|
# configuration
|
|
@@ -15,12 +18,8 @@ root_path = root_path.resolve()
|
|
|
downloaded_data_path = root_path / "downloaded_data"
|
|
|
extracted_data_path = root_path / "extracted_data"
|
|
|
|
|
|
-
|
|
|
input_folder = downloaded_data_path / 'UNFCCC' / 'Morocco' / 'BUR3'
|
|
|
output_folder = extracted_data_path / 'UNFCCC' / 'Morocco'
|
|
|
-if not output_folder.exists():
|
|
|
- output_folder.mkdir()
|
|
|
-
|
|
|
output_filename = 'MAR_BUR3_2022_'
|
|
|
|
|
|
inventory_file = 'Morocco_BUR3_Fr.pdf'
|
|
@@ -33,79 +32,29 @@ pages_to_read = range(104, 138)
|
|
|
|
|
|
compression = dict(zlib=True, complevel=9)
|
|
|
|
|
|
-header_defs = {
|
|
|
- 'Energy': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
|
|
|
- ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
|
|
|
- 'Agriculture': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
|
|
|
- ['', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg', 'Gg']],
|
|
|
- 'IPPU': [['Catégories', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6', 'NOx', 'CO', 'COVNM', 'SO2'],
|
|
|
- ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
|
|
|
- 'LULUCF': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
|
|
|
- ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
|
|
|
- 'Waste': [['Catégories', 'CO2', 'CH4', 'N2O', 'NOx', 'CO', 'COVNM', 'SO2'],
|
|
|
- ['', 'GgCO2eq', 'GgCO2eq', 'GgCO2eq', 'Gg', 'Gg', 'Gg', 'Gg']],
|
|
|
-}
|
|
|
-
|
|
|
-# define which raw tables to combine
|
|
|
-table_defs = {
|
|
|
- 2010: {
|
|
|
- 'Energy': [0, 1],
|
|
|
- 'Agriculture': [10],
|
|
|
- 'IPPU': [15, 16, 17],
|
|
|
- 'LULUCF': [30],
|
|
|
- 'Waste': [35],
|
|
|
- },
|
|
|
- 2012: {
|
|
|
- 'Energy': [2, 3],
|
|
|
- 'Agriculture': [11],
|
|
|
- 'IPPU': [18, 19, 20],
|
|
|
- 'LULUCF': [31],
|
|
|
- 'Waste': [36],
|
|
|
- },
|
|
|
- 2014: {
|
|
|
- 'Energy': [4, 5],
|
|
|
- 'Agriculture': [10],
|
|
|
- 'IPPU': [21, 22, 23],
|
|
|
- 'LULUCF': [32],
|
|
|
- 'Waste': [37],
|
|
|
- },
|
|
|
- 2016: {
|
|
|
- 'Energy': [6, 7],
|
|
|
- 'Agriculture': [10],
|
|
|
- 'IPPU': [24, 25, 26],
|
|
|
- 'LULUCF': [33],
|
|
|
- 'Waste': [38],
|
|
|
- },
|
|
|
- 2018: {
|
|
|
- 'Energy': [8, 9],
|
|
|
- 'Agriculture': [14],
|
|
|
- 'IPPU': [27, 28, 29],
|
|
|
- 'LULUCF': [34],
|
|
|
- 'Waste': [39],
|
|
|
- },
|
|
|
-}
|
|
|
-
|
|
|
# special header as category code and name in one column
|
|
|
header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
|
|
|
|
|
|
index_cols = ['Catégories']
|
|
|
|
|
|
# rows to remove
|
|
|
-cats_remove = []
|
|
|
+cats_remove = [
|
|
|
+ 'Agriculture' # always empty
|
|
|
+]
|
|
|
|
|
|
# manual category codes
|
|
|
cat_codes_manual = {
|
|
|
'1.A.2.e -Industries agro-alimentaires et du tabac': '1.A.2.e',
|
|
|
'1.A.2.f -Industries des minéraux non- métalliques': '1.A.2.f',
|
|
|
- 'Agriculture': 'M.AG',
|
|
|
+ #'Agriculture': 'M.AG',
|
|
|
'2. PIUP': '2',
|
|
|
'UTCATF': 'M.LULUCF',
|
|
|
- '3.B.1 Terres forestières': '3.B.1',
|
|
|
- '3.B.2 Terres cultivées': '3.B.2',
|
|
|
- '3.B.3 Prairies': '3.B.3',
|
|
|
- '3.B.4 Terres humides': '3.B.4',
|
|
|
- '3.B.5 Etablissements': '3.B.5',
|
|
|
- '3.B.6 Autres terres': '3.B.6',
|
|
|
+ '3.B.1 Terres forestières': 'LU.3.B.1',
|
|
|
+ '3.B.2 Terres cultivées': 'LU.3.B.2',
|
|
|
+ '3.B.3 Prairies': 'LU.3.B.3',
|
|
|
+ '3.B.4 Terres humides': 'LU.3.B.4',
|
|
|
+ '3.B.5 Etablissements': 'LU.3.B.5',
|
|
|
+ '3.B.6 Autres terres': 'LU.3.B.6',
|
|
|
'1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
|
|
|
}
|
|
|
|
|
@@ -113,7 +62,7 @@ cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
|
|
|
|
|
|
coords_terminologies = {
|
|
|
"area": "ISO3",
|
|
|
- "category": "IPCC2006_PRIMAP",
|
|
|
+ "category": "IPCC1996_2006_MAR_Inv",
|
|
|
"scenario": "PRIMAP",
|
|
|
}
|
|
|
|
|
@@ -140,9 +89,9 @@ coords_cols = {
|
|
|
"unit": "unit"
|
|
|
}
|
|
|
|
|
|
-add_coords_cols = {
|
|
|
- "orig_cat_name": ["orig_cat_name", "category"],
|
|
|
-}
|
|
|
+#add_coords_cols = {
|
|
|
+# "orig_cat_name": ["orig_cat_name", "category"],
|
|
|
+#}
|
|
|
|
|
|
filter_remove = {
|
|
|
"f1": {
|
|
@@ -184,6 +133,13 @@ for year in table_defs.keys():
|
|
|
df_this_table = df_this_table.drop(df_this_table.iloc[0:2].index)
|
|
|
df_this_table.columns = header_defs[sector]
|
|
|
|
|
|
+ # fix 2018 agri table
|
|
|
+ if (year == 2018) & (sector == "Agriculture"):
|
|
|
+ last_shift_row = 25
|
|
|
+ df_temp = df_this_table.iloc[0: last_shift_row, 1:].copy()
|
|
|
+ df_this_table.iloc[0, 1:] = ''
|
|
|
+ df_this_table.iloc[1: last_shift_row + 1, 1:] = df_temp
|
|
|
+
|
|
|
# replace line breaks, long hyphens, double, and triple spaces in category names
|
|
|
df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace("\n", " ")
|
|
|
df_this_table.iloc[:, 0] = df_this_table.iloc[:, 0].str.replace(" ", " ")
|
|
@@ -221,7 +177,7 @@ df_all = df_all.reset_index(drop=True)
|
|
|
|
|
|
# prepare numbers for pd.to_numeric
|
|
|
df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(' ', '')
|
|
|
-repl = lambda m: m.group('part1') + m.group('part2')
|
|
|
+repl = lambda m: m.group('part1') + '.' + m.group('part2')
|
|
|
df_all.loc[:, 'data'] = df_all.loc[:, 'data'].str.replace(
|
|
|
'(?P<part1>[0-9]+),(?P<part2>[0-9\.]+)$', repl, regex=True)
|
|
|
df_all['data'][df_all['data'].isnull()] = 'NaN'
|
|
@@ -231,6 +187,9 @@ for entity in df_all["entity"].unique():
|
|
|
df_all["entity"][(df_all["entity"] == entity) & (
|
|
|
df_all["unit"] == "GgCO2eq")] = f"{entity} ({gwp_to_use})"
|
|
|
|
|
|
+# drop "original_cat_name" as it has non-unique values per category
|
|
|
+df_all = df_all.drop(columns="orig_cat_name")
|
|
|
+
|
|
|
data_if = pm2.pm2io.convert_long_dataframe_if(
|
|
|
df_all,
|
|
|
coords_cols=coords_cols,
|
|
@@ -265,9 +224,94 @@ data_pm2 = data_pm2.drop_vars(entities_to_convert)
|
|
|
# convert back to IF to have units in the fixed format
|
|
|
data_if = data_pm2.pr.to_interchange_format()
|
|
|
|
|
|
-##### save data to IF and native format ####
|
|
|
+# ###
|
|
|
+# convert to IPCC2006 categories
|
|
|
+# ###
|
|
|
+data_if_2006 = copy.deepcopy(data_if)
|
|
|
+data_if_2006.attrs = copy.deepcopy(data_if.attrs)
|
|
|
+
|
|
|
+filter_remove_cats = {
|
|
|
+ "cat": {
|
|
|
+ f"category ({coords_terminologies['category']})":
|
|
|
+ remove_cats
|
|
|
+ },
|
|
|
+}
|
|
|
+
|
|
|
+filter_data(data_if_2006, filter_remove=filter_remove_cats)
|
|
|
+
|
|
|
+# map categories
|
|
|
+data_if_2006 = data_if_2006.replace(
|
|
|
+ {f"category ({coords_terminologies['category']})": cat_mapping})
|
|
|
+data_if_2006[f"category ({coords_terminologies['category']})"].unique()
|
|
|
+
|
|
|
+# rename the category col
|
|
|
+data_if_2006.rename(columns={
|
|
|
+ f"category ({coords_terminologies['category']})": 'category (IPCC2006_PRIMAP)'},
|
|
|
+ inplace=True)
|
|
|
+data_if_2006.attrs['attrs']['cat'] = 'category (IPCC2006_PRIMAP)'
|
|
|
+data_if_2006.attrs['dimensions']['*'] = [
|
|
|
+ 'category (IPCC2006_PRIMAP)' if item == f"category ({coords_terminologies['category']})"
|
|
|
+ else item for item in data_if_2006.attrs['dimensions']['*']]
|
|
|
+# aggregate categories
|
|
|
+time_format = '%Y'
|
|
|
+time_columns = [
|
|
|
+ col
|
|
|
+ for col in data_if_2006.columns.values
|
|
|
+ if matches_time_format(col, time_format)
|
|
|
+]
|
|
|
+
|
|
|
+for cat_to_agg in aggregate_cats:
|
|
|
+ mask = data_if_2006["category (IPCC2006_PRIMAP)"].isin(
|
|
|
+ aggregate_cats[cat_to_agg]["sources"])
|
|
|
+ df_test = data_if_2006[mask]
|
|
|
+ # print(df_test)
|
|
|
+
|
|
|
+ if len(df_test) > 0:
|
|
|
+ print(f"Aggregating category {cat_to_agg}")
|
|
|
+ df_combine = df_test.copy(deep=True)
|
|
|
+
|
|
|
+ for col in time_columns:
|
|
|
+ df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
|
|
|
+
|
|
|
+ df_combine = df_combine.groupby(
|
|
|
+ by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
|
|
|
+ 'unit']).sum(min_count=1)
|
|
|
+
|
|
|
+ df_combine.insert(0, "category (IPCC2006_PRIMAP)", cat_to_agg)
|
|
|
+ # df_combine.insert(1, "cat_name_translation", aggregate_cats[cat_to_agg]["name"])
|
|
|
+ # df_combine.insert(2, "orig_cat_name", "computed")
|
|
|
+
|
|
|
+ df_combine = df_combine.reset_index()
|
|
|
+
|
|
|
+ data_if_2006 = pd.concat([data_if_2006, df_combine], axis=0, join='outer')
|
|
|
+ data_if_2006 = data_if_2006.reset_index(drop=True)
|
|
|
+ else:
|
|
|
+ print(f"no data to aggregate category {cat_to_agg}")
|
|
|
+
|
|
|
+for cat in zero_cats:
|
|
|
+ entities = data_if_2006["entity"].unique()
|
|
|
+ data_zero = data_if_2006[data_if_2006["category (IPCC2006_PRIMAP)"]=="1"].copy(
|
|
|
+ deep=True)
|
|
|
+ data_zero["category (IPCC2006_PRIMAP)"] = cat
|
|
|
+ for col in time_columns:
|
|
|
+ data_zero[col] = 0
|
|
|
+
|
|
|
+ data_if_2006 = pd.concat([data_if_2006, data_zero])
|
|
|
+
|
|
|
+# conversion to PRIMAP2 native format
|
|
|
+data_pm2_2006 = pm2.pm2io.from_interchange_format(data_if_2006)
|
|
|
+
|
|
|
+# convert back to IF to have units in the fixed format
|
|
|
+data_if_2006 = data_pm2_2006.pr.to_interchange_format()
|
|
|
+
|
|
|
+
|
|
|
+# ###
|
|
|
+# save data to IF and native format
|
|
|
+# ###
|
|
|
if not output_folder.exists():
|
|
|
output_folder.mkdir()
|
|
|
+
|
|
|
+# data in original categories
|
|
|
pm2.pm2io.write_interchange_format(
|
|
|
output_folder / (output_filename + coords_terminologies["category"]), data_if)
|
|
|
|
|
@@ -276,3 +320,10 @@ data_pm2.pr.to_netcdf(
|
|
|
output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
|
|
|
encoding=encoding)
|
|
|
|
|
|
+# data in 2006 categories
|
|
|
+pm2.pm2io.write_interchange_format(
|
|
|
+ output_folder / (output_filename + "IPCC2006_PRIMAP"), data_if_2006)
|
|
|
+
|
|
|
+encoding = {var: compression for var in data_pm2_2006.data_vars}
|
|
|
+data_pm2_2006.pr.to_netcdf(
|
|
|
+ output_folder / (output_filename + "IPCC2006_PRIMAP" + ".nc"), encoding=encoding)
|