Browse Source

[DATALAD] Recorded changes

Johannes Gütschow 2 years ago
parent
commit
50b989e70c

+ 3 - 0
code/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py

@@ -12,6 +12,9 @@ from pathlib import Path
 # configuration
 # ###
 
+# TODO: lot's of empty lines are written in csv file. check if solved with new
+#  PRIMAP2 version
+
 # folders and files
 root_path = Path(__file__).parents[3].absolute()
 root_path = root_path.resolve()

+ 27 - 5
code/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py

@@ -5,11 +5,9 @@
 import pandas as pd
 import primap2 as pm2
 from pathlib import Path
+from primap2.pm2io._data_reading import matches_time_format
 
 
-
-from primap2.pm2io._data_reading import filter_data
-
 # ###
 # configuration
 # ###
@@ -213,10 +211,34 @@ data_if = pm2.pm2io.convert_long_dataframe_if(
     )
 
 
+# combine CO2 emissions and absorptions
+data_CO2 = data_if[data_if[f"entity"].isin([
+    'CO2 Absorptions', 'CO2 Emissions'])]
+
+time_format = '%Y'
+time_columns = [
+    col
+    for col in data_CO2.columns.values
+    if matches_time_format(col, time_format)
+]
+
+for col in time_columns:
+    data_CO2[col] = pd.to_numeric(data_CO2[col], errors="coerce")
+
+data_CO2 = data_CO2.groupby(
+    by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)',
+        f"category ({coords_terminologies['category']})",
+        'unit']).sum(min_count = 1)
+
+data_CO2.insert(0, 'entity', 'CO2')
+data_CO2 = data_CO2.reset_index()
+
+data_if = pd.concat([data_if, data_CO2])
+
+
+
 data_pm2 = pm2.pm2io.from_interchange_format(data_if)
 
-# combine CO2 emissions and absorptions
-data_pm2["CO2"] = data_pm2['CO2 Absorptions'] + data_pm2['CO2 Emissions']
 
 # convert back to IF to have units in the fixed format
 data_if = data_pm2.pr.to_interchange_format()

+ 111 - 18
code/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py

@@ -45,9 +45,10 @@ header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 # manual category codes
 cat_codes_manual = {
     'Total National Emissions and Removals': '0',
-    'Peat Decomposition': 'M.PD',
-    'Peat Fire': 'M.PF',
+    'Peat Decomposition': 'M.3.B.4.APD',
+    'Peat Fire': 'M.3.B.4.APF',
     '4A1.2 Industrial Solid Waste Disposal': 'M.4.A.Ind',
+    #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
 }
 
 cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
@@ -58,6 +59,9 @@ coords_cols = {
     "unit": "unit",
 }
 
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
 
 coords_terminologies = {
     "area": "ISO3",
@@ -143,6 +147,15 @@ aggregate_cats = {
     '4.A': {'sources': ['4.A.2', 'M.4.A.Ind'], 'name': 'Solid Waste Disposal (calculated)'},
 }
 
+aggregate_cats_N2O = {
+    '3.A.2': {'sources': ['3.A.2.b'], 'name': '3A2 Manure Management'},
+    '3.A': {'sources': ['3.A.2'], 'name': '3A Livestock'},
+}
+
+aggregate_cats_CO2CH4N2O = {
+    '3.A.2': {'sources': ['3.A.2', '3.A.2.b'], 'name': '3A2 Manure Management'},
+}
+
 df_all = None
 
 for page in pages_to_read:
@@ -198,17 +211,18 @@ df_all = df_all.reset_index(drop=True)
 # replace "," with "" in data
 df_all.loc[:, "data"] = df_all.loc[:, "data"].str.replace(',','', regex=False)
 
-
 # make sure all col headers are str
 df_all.columns = df_all.columns.map(str)
 
+
+
 # ###
 # convert to PRIMAP2 interchange format
 # ###
 data_if = pm2.pm2io.convert_long_dataframe_if(
     df_all,
     coords_cols=coords_cols,
-    #add_coords_cols=add_coords_cols,
+    add_coords_cols=add_coords_cols,
     coords_defaults=coords_defaults,
     coords_terminologies=coords_terminologies,
     coords_value_mapping=coords_value_mapping,
@@ -246,7 +260,7 @@ for cat_to_agg in aggregate_cats:
 
         df_combine = df_combine.groupby(
             by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
-                'unit']).sum()
+                'unit']).sum(min_count=1)
 
         df_combine.insert(0, cat_label, cat_to_agg)
         df_combine.insert(1, "orig_cat_name", aggregate_cats[cat_to_agg]["name"])
@@ -256,23 +270,102 @@ for cat_to_agg in aggregate_cats:
         data_if = pd.concat([data_if, df_combine])
     else:
         print(f"no data to aggregate category {cat_to_agg}")
-data_if.attrs = attrs
 
-data_pm2 = pm2.pm2io.from_interchange_format(data_if)
 
-# convert to mass units from CO2eq
-entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in
-                       entities_to_convert_to_mass]
+# delete cat 3 for N2O as it's wrong
+index_3A_N2O = data_if[(data_if[cat_label] == '3') &
+                       (data_if['entity'] == 'N2O')].index
+data_if = data_if.drop(index_3A_N2O)
+
+# aggregate cat 3 for N2O
+for cat_to_agg in aggregate_cats_N2O:
+    mask = data_if[cat_label].isin(aggregate_cats_N2O[cat_to_agg]["sources"])
+    df_test = data_if[mask]
+    df_test = df_test[df_test["entity"] == "N2O"]
 
-for entity in entities_to_convert:
-    converted = data_pm2[entity].pr.convert_to_mass()
-    basic_entity = entity.split(" ")[0]
-    converted = converted.to_dataset(name=basic_entity)
-    data_pm2 = data_pm2.pr.merge(converted)
-    data_pm2[basic_entity].attrs["entity"] = basic_entity
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
+                'unit']).sum(min_count=1)
+
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name", aggregate_cats_N2O[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        data_if = pd.concat([data_if, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+# delete cat 3.A.2 for CO2CH4N2O as it's wrong
+index_3A2_CO2CH4N2O = data_if[(data_if[cat_label] == '3.A.2') &
+                       (data_if['entity'] == 'CH4CO2N2O (SARGWP100)')].index
+data_if = data_if.drop(index_3A2_CO2CH4N2O)
+
+# aggregate cat 3 for N2O
+for cat_to_agg in aggregate_cats_CO2CH4N2O:
+    mask = data_if[cat_label].isin(aggregate_cats_CO2CH4N2O[cat_to_agg]["sources"])
+    df_test = data_if[mask]
+    df_test = df_test[df_test["entity"] == "CO2CH4N2O (SARGWP100)"]
+
+    if len(df_test) > 0:
+        print(f"Aggregating category {cat_to_agg}")
+        df_combine = df_test.copy(deep=True)
+
+        time_format = '%Y'
+        time_columns = [
+            col
+            for col in df_combine.columns.values
+            if matches_time_format(col, time_format)
+        ]
+
+        for col in time_columns:
+            df_combine[col] = pd.to_numeric(df_combine[col], errors="coerce")
+
+        df_combine = df_combine.groupby(
+            by=['source', 'scenario (PRIMAP)', 'provenance', 'area (ISO3)', 'entity',
+                'unit']).sum(min_count=1)
+
+        df_combine.insert(0, cat_label, cat_to_agg)
+        df_combine.insert(1, "orig_cat_name", aggregate_cats_CO2CH4N2O[cat_to_agg]["name"])
+
+        df_combine = df_combine.reset_index()
+
+        data_if = pd.concat([data_if, df_combine])
+    else:
+        print(f"no data to aggregate category {cat_to_agg}")
+
+
+data_if.attrs = attrs
+
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
 
-# drop the GWP data
-data_pm2 = data_pm2.drop_vars(entities_to_convert)
+# # convert to mass units from CO2eq
+# entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in
+#                        entities_to_convert_to_mass]
+#
+# for entity in entities_to_convert:
+#     converted = data_pm2[entity].pr.convert_to_mass()
+#     basic_entity = entity.split(" ")[0]
+#     converted = converted.to_dataset(name=basic_entity)
+#     data_pm2 = data_pm2.pr.merge(converted)
+#     data_pm2[basic_entity].attrs["entity"] = basic_entity
+#
+# # drop the GWP data
+# data_pm2 = data_pm2.drop_vars(entities_to_convert)
 
 # convert back to IF to have units in the fixed format
 data_if = data_pm2.pr.to_interchange_format()

+ 4 - 1
code/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py

@@ -61,6 +61,9 @@ coords_cols = {
     "unit": "unit",
 }
 
+add_coords_cols = {
+    "orig_cat_name": ["orig_cat_name", "category"],
+}
 
 coords_terminologies = {
     "area": "ISO3",
@@ -182,7 +185,7 @@ df_all.columns = df_all.columns.map(str)
 data_if = pm2.pm2io.convert_long_dataframe_if(
     df_all,
     coords_cols=coords_cols,
-    #add_coords_cols=add_coords_cols,
+    add_coords_cols=add_coords_cols,
     coords_defaults=coords_defaults,
     coords_terminologies=coords_terminologies,
     coords_value_mapping=coords_value_mapping,

+ 5 - 1
code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py

@@ -53,11 +53,15 @@ cat_mapping = {
 aggregate_cats = {
     '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
     '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.4', '3.B.5', '3.B.6'], 'name': 'Land'},
+    '3.C.1.AG': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass '
+                                                          'Burning (Agriculture)'},
     '3.C.1': {'sources': ['3.C.1.c', '3.C.1.b'], 'name': 'Emissions from Biomass Burning'},
     '3.C': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
             'name': 'Aggregate sources and non-CO2 emissions sources on land'},
-    'M.3.C.AG': {'sources': ['3.C.1', '3.C.3', 'M.3.C.45AG', '3.C.7'],
+    'M.3.C.AG': {'sources': ['3.C.1.AG', '3.C.3', 'M.3.C.45AG', '3.C.7'],
             'name': 'Aggregate sources and non-CO2 emissions sources on land (Agriculture)'},
+    '3.D': {'sources': ['3.D.1'], 'name': 'Other'},
     '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
     'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
+    '0': {'sources': ['1', '2', '3', '4', '5']},
 }

+ 17 - 9
code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py

@@ -78,6 +78,14 @@ coords_value_mapping = {
     },
 }
 
+coords_value_filling = {
+    "category": {
+        "orig_cat_name": {
+            'International Bunkers': 'M.BK',
+        },
+    },
+}
+
 coords_cols = {
     "category": "category",
     "entity": "entity",
@@ -184,15 +192,15 @@ for i, table in enumerate(tables):
 entities_to_convert = ['N2O', 'SF6', 'CH4']
 entities_to_convert = [f"{entity} ({gwp_to_use})" for entity in entities_to_convert]
 
-for entity in entities_to_convert:
-    converted = data_all[entity].pr.convert_to_mass()
-    basic_entity = entity.split(" ")[0]
-    converted = converted.to_dataset(name=basic_entity)
-    data_all = data_all.pr.merge(converted)
-    data_all[basic_entity].attrs["entity"] = basic_entity
-
-# drop the GWP data
-data_all = data_all.drop_vars(entities_to_convert)
+# for entity in entities_to_convert:
+#     converted = data_all[entity].pr.convert_to_mass()
+#     basic_entity = entity.split(" ")[0]
+#     converted = converted.to_dataset(name=basic_entity)
+#     data_all = data_all.pr.merge(converted)
+#     data_all[basic_entity].attrs["entity"] = basic_entity
+#
+# # drop the GWP data
+# data_all = data_all.drop_vars(entities_to_convert)
 
 # convert back to IF
 data_if = data_all.pr.to_interchange_format()

+ 2 - 0
code/UNFCCC_reader/Morocco/config_MAR_BUR3.py

@@ -133,6 +133,8 @@ aggregate_cats = {
     '3': {'sources': ['M.AG', 'M.LULUCF'], 'name': 'AFOLU'},
     'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock emissions'},
     '4': {'sources': ['4.A', '4.D'], 'name': 'Waste'},
+    '0': {'sources': ['1', '2', '3', '4']},
+    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4']},
 }
 
 zero_cats = ['1.B.2.a.i', '1.B.2.a.ii'] # venting and flaring with 0 for oil as

+ 2 - 1
code/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py

@@ -207,7 +207,8 @@ df_all.columns = df_all.columns.map(str)
 # conversion to PRIMAP2 native format
 data_pm2 = pm2.pm2io.from_interchange_format(data_if)
 
-entities_to_convert = ['N2O', 'SF6', 'CO2', 'CH4']
+entities_to_convert = ['CO2'] #['N2O', 'SF6', 'CO2', 'CH4'] # CO2 is not converted on
+# conversion to IF as data with and without GWP exists. needs to be fixed in primap2
 entities_to_convert = [f"{entity} (AR4GWP100)" for entity in entities_to_convert]
 
 # convert GWP units to mass units

+ 8 - 2
code/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py

@@ -395,13 +395,19 @@ aggregate_after_mapping = {
     '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4', '2.A.5', '2.A.6'],
             'name': 'Mineral Industry'},
     '2.B': {'sources': ['2.B', '2.B.7', '2.B.9'], 'name': 'Chemical Industry'},
-    '2.D': {'sources': ['2006.2.D.4'], 'name': 'Other'},
+    '2.D': {'sources': ['2.D.4'], 'name': 'Other'},
     '2.E': {'sources': ['2.E_1', '2.E_2'], 'name': 'Electronics Industry'},
     '2.F': {'sources': ['2.F.1', '2.F.2', '2.F.3', '2.F.4', '2.F.5'],
             'name': 'Product uses as Substitutes for Ozone Depleting Substances'},
     '2.G': {'sources': ['2.G.1', '2.G.2'], 'name': 'Other Product Manufacture and Use'},
     '3.A': {'sources': ['3.A.1', '3.A.2'], 'name': 'Livestock'},
-    'M.3.C.AG': {'sources': ['3.C.4', '3.C.5', '3.C.7'], 'name': 'Livestock'},
+    '3.C': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
+                 'name': 'Aggregate sources and non-CO2 emissions sources on land'},
+    'M.3.C.AG': {'sources': ['3.C.4', '3.C.5', '3.C.7'],
+                 'name': 'Aggregate sources and non-CO2 emissions sources on land ('
+                         'Agriculture)'},
+    'M.AG.ELV': {'sources': ['M.3.C.AG'], 'name': 'Agriculture excluding livestock'},
+    '4.C': {'sources': ['4.C.1'], 'name': 'Incineration and Open Burning of Waste'},
 }
 
 coords_terminologies_2006 = {

+ 1 - 1
downloaded_data/UNFCCC/submissions-annexI_2022.csv

@@ -1 +1 @@
-../../.git/annex/objects/q8/6G/MD5E-s28864--b15316f801cfaacc706b982a5d34af9d.csv/MD5E-s28864--b15316f801cfaacc706b982a5d34af9d.csv
+../../.git/annex/objects/fp/g4/MD5E-s28973--20d894ec140f55bed9c9182bb516fdb1.csv/MD5E-s28973--20d894ec140f55bed9c9182bb516fdb1.csv