Johannes 11 miesięcy temu
rodzic
commit
1aef3f8112

+ 291 - 0
UNFCCC_GHG_data/UNFCCC_reader/Argentina/config_ARG_BUR5.py

@@ -0,0 +1,291 @@
+### config for reading and conversion to primap2 format
+time_format = "%Y"
+
+coords_cols = {
+    "category": "id_ipcc",
+    "entity": "tipo_de_gas",
+    "time": "año",
+    "data": "valor_en_toneladas_de_gas",
+}
+
+add_coords_cols = {}
+
+coords_terminologies = {
+    "area": "ISO3",
+    "category": "IPCC2006_PRIMAP",
+    "scenario": "PRIMAP",
+}
+
+coords_defaults = {
+    "source": "ARG-GHG-Inventory",
+    "provenance": "measured",
+    "area": "ARG",
+    "scenario": "BUR5",
+    #"unit": "tonnes" # this might not work as he entity has to be specified
+}
+
+unit = 't'
+
+coords_value_mapping = {
+    "category": "PRIMAP1",
+    "unit": "PRIMAP1",
+    "entity": {
+        'HFC_23': 'HFC23',
+        'HFC_32': 'HFC32',
+        'HFC_125': 'HFC125',
+        'HFC_134a': 'HFC134a',
+        'HFC_152a': 'HFC152a',
+        'HFC_143a': 'HFC143a',
+        'HFC_227ea': 'HFC227ea',
+        'HFC_236fa': 'HFC236fa',
+        'HFC_365mfc': 'HFC365mfc',
+        'HFC_245fa': 'HFC245fa',
+        'PFC_143_CF4': 'CF4',
+        'PFC_116_C2F6': 'C2F6',
+    },
+}
+
+coords_value_filling = {
+}
+
+filter_remove = {
+}
+
+filter_keep = {}
+
+meta_data = {
+    "ref": "https://unfccc.int/documents/634953",
+    "ref2": "https://ciam.ambiente.gob.ar/repositorio.php?tid=9&stid=36&did=394#",
+    "rights": "",
+    "contact": "mail@johannes-guetschow.de",
+    "title": "",
+    "comment": "Read fom pcsv file by Johannes Gütschow",
+    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
+}
+
+
+ ### config for processing
+
+# many custom categories which are not in climate categories, so automatic
+# aggregation would be a lot of coding work
+cats_to_agg = { # name is just for readability, not used
+    '1.A.1.c': {'sources': ['1.A.1.c.ii'],
+                'name': 'Manufacture of Solid Fuels and Other Energy Industries'},
+    '1.A.1': {'sources': ['1.A.1.a', '1.A.1.b', '1.A.1.c'],
+              'name': 'Energy Industries'},
+    '1.A.2': {'sources': ['1.A.2.a', '1.A.2.b', '1.A.2.c', '1.A.2.d',
+                          '1.A.2.e', '1.A.2.f', '1.A.2.g', '1.A.2.j',
+                          '1.A.2.l', '1.A.2.m'],
+              'name': 'Manufacturing Industries and Construction'},
+    '1.A.3.a': {'sources': ['1.A.3.a.ii'],
+                'name': 'Civil Aviation'},
+    '1.A.3.b': {'sources': ['1.A.3.b.iii', '1.A.3.b.vii'],
+                'name': 'Road Transportation'},
+    '1.A.3.d': {'sources': ['1.A.3.d.ii'],
+                'name': 'Water-Borne Navigation'},
+    '1.A.3.e': {'sources': ['1.A.3.e.i'],
+                'name': 'Other Transportation'},
+    '1.A.3': {'sources': ['1.A.3.a', '1.A.3.b', '1.A.3.c', '1.A.3.d',
+                          '1.A.3.e'],
+              'name': 'Transport'},
+    '1.A.4.a': {'sources': ['1.A.4.a.i', '1.A.4.a.ii', '1.A.4.a.iii'],
+                'name': 'Commercial/Institutional'},
+    '1.A.4': {'sources': ['1.A.4.a', '1.A.4.b', '1.A.4.c'],
+              'name': 'Other Sectors'},
+    '1.A': {'sources': ['1.A.1', '1.A.2', '1.A.3', '1.A.4'],
+            'name': 'Fuel Combustion Activities'},
+    '1.B.1.a.i': {'sources': ['1.B.1.a.i.1', '1.B.1.a.i.2'],
+                  'name': 'Underground mines'},
+    '1.B.1.a': {'sources': ['1.B.1.a.i'],
+                'name': 'Coal Mining and Handling'},
+    '1.B.1.c': {'sources': ['1.B.1.c.i'],
+                'name': 'Solid Fuel Transformation'},
+    '1.B.1': {'sources': ['1.B.1.a', '1.B.1.c'],
+              'name': 'Solid Fuels'},
+    '1.B.2.a': {'sources': ['1.B.2.a.i', '1.B.2.a.ii', '1.B.2.a.iii',
+                            '1.B.2.a.iv'],
+                'name': 'Oil'},
+    '1.B.2.b': {'sources': ['1.B.2.b.i', '1.B.2.b.ii', '1.B.2.b.iii',
+                            '1.B.2.b.iv', '1.B.2.b.v', '1.B.2.b.vi'],
+                'name': 'Natural Gas'},
+    '1.B.2': {'sources': ['1.B.2.a', '1.B.2.b'],
+              'name': 'Oil and Natural Gas'},
+    '1.B': {'sources': ['1.B.1', '1.B.2'],
+            'name': 'Fugitive Emissions from Fuels'},
+    '1': {'sources': ['1.A', '1.B'],
+          'name': 'Energy'},
+    '2.A.4': {'sources': ['2.A.4.a', '2.A.4.b', '2.A.4.d'],
+              'name': 'Other Process Uses of Carbonates'},
+    '2.A': {'sources': ['2.A.1', '2.A.2', '2.A.4'],
+            'name': 'Mineral Industry'},
+    '2.B.8': {'sources': ['2.B.8.a', '2.B.8.b', '2.B.8.c', '2.B.8.f'],
+              'name': 'Petrochemical and Carbon Black Production'},
+    '2.B.9': {'sources': ['2.B.9.a'],
+              'name': 'Fluorochemical Production'},
+    '2.B': {'sources': ['2.B.1', '2.B.2', '2.B.5', '2.B.7', '2.B.8', '2.B.9'],
+            'name': 'Chemical Industry'},
+    '2.C': {'sources': ['2.C.1', '2.C.2', '2.C.3', '2.C.6'],
+            'name': 'Metal Industry'},
+    '2.D': {'sources': ['2.D.1', '2.D.2'],
+            'name': 'Non-Energy Products from Fuels and Solvent Use'},
+    '2.F.1': {'sources': ['2.F.1.a', '2.F.1.b'],
+              'name': 'Refrigeration and Air Conditioning'},
+    '2.F': {'sources': ['2.F.1', '2.F.2', '2.F.3', '2.F.4'],
+            'name': 'Product Uses as Substitutes for Ozone Depleting Substances'},
+    '2': {'sources': ['2.A', '2.B', '2.C', '2.D', '2.F'],
+          'name': 'IPPU'},
+    # AFOLU
+    # 3.A - Livestock
+    '3.A.1.a': {'sources': ['3.A.1.a.i', '3.A.1.a.ii'],
+                'name': 'Cattle'},
+    '3.A.1': {'sources': ['3.A.1.a',  '3.A.1.b', '3.A.1.c', '3.A.1.d',
+                          '3.A.1.e', '3.A.1.f', '3.A.1.g', '3.A.1.h'],
+              'name': 'Enteric Fermentation'},
+    '3.A.2.a': {'sources': ['3.A.2.a.i', '3.A.2.a.ii'],
+                'name': 'Cattle'},
+    '3.A.2': {'sources': ['3.A.2.a', '3.A.2.b', '3.A.2.c', '3.A.2.d',
+                          '3.A.2.e', '3.A.2.f', '3.A.2.g', '3.A.2.h',
+                          '3.A.2.i'],
+              'name': 'Enteric Fermentation'},
+    '3.A': {'sources': ['3.A.1', '3.A.2'],
+            'name': 'Livestock'},
+    # 3.B - Land
+    '3.B.1.a.i': {'sources': ['3.B.1.a.i.1', '3.B.1.a.i.2'],
+                  'name': ''}, # no name, not the normal IPCC category
+    '3.B.1.a.ii': {'sources': ['3.B.1.a.ii.1', '3.B.1.a.ii.2'],
+                   'name': ''}, # no name, not the normal IPCC category
+    '3.B.1.a': {'sources': ['3.B.1.a.i', '3.B.1.a.ii'],
+                'name': 'Forest Land Remaining Forest Land'},
+    # '3.B.1.b': {'sources': ['3.B.1.b.i', '3.B.1.b.ii'],
+    #             'name': 'Land Converted to Forest Land'},
+    '3.B.1': {'sources': ['3.B.1.a'],#, '3.B.1.b'],
+              'name': 'Forest Land'},
+    '3.B.2.b': {'sources': ['3.B.2.b.i', '3.B.2.b.ii'],
+                'name': 'Land Converted to Cropland'},
+    '3.B.2': {'sources': ['3.B.2.b'],
+              'name': 'Cropland'},
+    '3.B.3.b': {'sources': ['3.B.3.b.i', '3.B.3.b.ii'],
+                'name': 'Land Converted to Grassland'},
+    '3.B.3': {'sources': ['3.B.3.b'],
+              'name': 'Grassland'},
+    '3.B': {'sources': ['3.B.1', '3.B.2', '3.B.3', '3.B.7'],
+            'name': 'Land'},
+    # 3.C - Aggregate Sources and Non-CO2 Emissions Sources on Land
+    '3.C.1.a': {'sources': ['3.C.1.a.i', '3.C.1.a.ii'],
+                'name': 'Biomass Burning in Forest Lands'},
+    '3.C.1.b': {'sources': ['3.C.1.b.i', '3.C.1.b.ii'],
+                'name': 'Biomass Burning in Croplands'},
+    'M.3.C.1.b.AG': {'sources': ['3.C.1.b.i'],
+                     'name': 'Biomass Burning in Croplands - Agriculture'},
+    'M.3.C.1.b.LU': {'sources': ['3.C.1.b.ii'],
+                     'name': 'Biomass Burning in Croplands - LULUCF'},
+    '3.C.1.c': {'sources': ['3.C.1.c.i', '3.C.1.c.ii'],
+                'name': 'Biomass Burning in Grasslands'},
+    'M.3.C.1.c.AG': {'sources': ['3.C.1.c.i'],
+                     'name': 'Biomass Burning in Grasslands - Agriculture'},
+    'M.3.C.1.c.LU': {'sources': ['3.C.1.c.ii'],
+                     'name': 'Biomass Burning in Grasslands - LULUCF'},
+    '3.C.1': {'sources': ['3.C.1.a', '3.C.1.b', '3.C.1.c'],
+              'name': 'Biomass Burning'},
+    'M.3.C.1.AG': {'sources': ['M.3.C.1.b.AG', 'M.3.C.1.c.AG'],
+                   'name': 'Biomass Burning - Agriculture'},
+    'M.3.C.1.LU': {'sources': ['3.C.1.a', 'M.3.C.1.b.LU', 'M.3.C.1.c.LU'],
+                   'name': 'Biomass Burning'},
+    '3.C.4.d': {'sources': ['3.C.4.d.i', '3.C.4.d.ii', '3.C.4.d.iii',
+                            '3.C.4.d.iv', '3.C.4.d.v', '3.C.4.d.vi',
+                            '3.C.4.d.vii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.4.g': {'sources': ['3.C.4.g.i', '3.C.4.g.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.4': {'sources': ['3.C.4.a', '3.C.4.b', '3.C.4.c', '3.C.4.d',
+                          '3.C.4.e', '3.C.4.f', '3.C.4.g', '3.C.4.n',
+                          '3.C.4.o'],
+              'name': 'Direct N2O Emissions from Managed Soils'},
+    '3.C.5.a': {'sources': ['3.C.5.a.i', '3.C.5.a.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.b': {'sources': ['3.C.5.b.i', '3.C.5.b.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.c': {'sources': ['3.C.5.c.i', '3.C.5.c.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.d.i': {'sources': ['3.C.5.d.i.1', '3.C.5.d.i.2'],
+                  'name': ''}, # not standard IPCC2006
+    '3.C.5.d.ii': {'sources': ['3.C.5.d.ii.1', '3.C.5.d.ii.2'],
+                   'name': ''}, # not standard IPCC2006
+    '3.C.5.d.iii': {'sources': ['3.C.5.d.iii.1', '3.C.5.d.iii.2'],
+                    'name': ''}, # not standard IPCC2006
+    '3.C.5.d.iv': {'sources': ['3.C.5.d.iv.1', '3.C.5.d.iv.2'],
+                   'name': ''}, # not standard IPCC2006
+    '3.C.5.d.v': {'sources': ['3.C.5.d.v.1', '3.C.5.d.v.2'],
+                  'name': ''}, # not standard IPCC2006
+    '3.C.5.d.vi': {'sources': ['3.C.5.d.vi.1', '3.C.5.d.vi.2'],
+                   'name': ''}, # not standard IPCC2006
+    '3.C.5.d.vii': {'sources': ['3.C.5.d.vii.1', '3.C.5.d.vii.2'],
+                    'name': ''}, # not standard IPCC2006
+    '3.C.5.d': {'sources': ['3.C.5.d.i', '3.C.5.d.ii', '3.C.5.d.iii',
+                            '3.C.5.d.iv', '3.C.5.d.v', '3.C.5.d.vi',
+                            '3.C.5.d.vii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.f': {'sources': ['3.C.5.f.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.g.i': {'sources': ['3.C.5.g.i.1', '3.C.5.g.i.2'],
+                  'name': ''}, # not standard IPCC2006
+    '3.C.5.g.ii': {'sources': ['3.C.5.g.ii.1', '3.C.5.g.ii.2'],
+                   'name': ''}, # not standard IPCC2006
+    '3.C.5.g': {'sources': ['3.C.5.g.i', '3.C.5.g.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.n': {'sources': ['3.C.5.n.i', '3.C.5.n.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5.o': {'sources': ['3.C.5.o.i', '3.C.5.o.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.5': {'sources': ['3.C.5.a', '3.C.5.b', '3.C.5.c', '3.C.5.d',
+                          '3.C.5.e', '3.C.5.f', '3.C.5.g', '3.C.5.n',
+                          '3.C.5.o'],
+              'name': 'Indirect N2O Emissions from Managed Soils'},
+    '3.C.6.a.i': {'sources': ['3.C.6.a.i.1'],
+                  'name': ''}, # not standard IPCC2006
+    '3.C.6.a.ii': {'sources': ['3.C.6.a.ii.1', '3.C.6.a.ii.2'],
+                   'name': ''}, # not standard IPCC2006
+    '3.C.6.a': {'sources': ['3.C.6.a.i', '3.C.6.a.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.6.h': {'sources': ['3.C.6.h.i', '3.C.6.h.ii'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.6.i': {'sources': ['3.C.6.i.i'],
+                'name': ''}, # not standard IPCC2006
+    '3.C.6': {'sources': ['3.C.6.a', '3.C.6.h', '3.C.6.i'],
+              'name': 'Indirect N2O Emissions from Manure Management'},
+    '3.C': {'sources': ['3.C.1', '3.C.3', '3.C.4', '3.C.5', '3.C.6', '3.C.7'],
+            'name': 'Emissions from Biomass Burning'},
+    'M.3.C.AG': {'sources': ['M.3.C.1.AG', '3.C.3', '3.C.4', '3.C.5', '3.C.6',
+                             '3.C.7'],
+                 'name': 'Emissions from Biomass Burning - Agriculture'},
+    'M.AG.ELV': {'sources': ['M.3.C.AG'],
+                 'name': 'Agriculture Excluding Livestock'},
+    'M.3.C.LU': {'sources': ['M.3.C.1.LU'],
+                 'name': 'Emissions from Biomass Burning - LULUCF'},
+    '3.D': {'sources': ['3.D.1'],
+            'name': 'Other'},
+    'M.3.D.LU': {'sources': ['3.D.1'],
+                 'name': 'Other - LULUCF'},
+    '3': {'sources': ['3.A', '3.B', '3.C', '3.D'],
+          'name': 'AFOLU'},
+    'M.AG': {'sources': ['3.A', 'M.3.C.AG'],
+             'name': 'Agriculture'},
+    'M.LULUCF': {'sources': ['3.B', 'M.3.C.LU', '3.D'],
+                 'name': 'LULUCF'},
+    # waste
+    '4.A': {'sources': ['4.A.1', '4.A.3'],
+            'name': 'Solid Waste Disposal'},
+    '4.C': {'sources': ['4.C.1'],
+            'name': 'Incineration and Open Burning of Waste'},
+    '4.D.2': {'sources': ['4.D.2.a', '4.D.2.b', '4.D.2.c', '4.D.2.d', '4.D.2.e'],
+              'name': 'Industrial Wastewater Treatment and Discharge'},
+    '4.D': {'sources': ['4.D.1', '4.D.2'],
+            'name': 'Wastewater Treatment and Discharge'},
+    '4': {'sources': ['4.A', '4.B', '4.C', '4.D'],
+          'name': 'Waste'},
+    # national totals
+    '0': {'sources': ['1', '2', '3', '4'],
+          'name': 'National Total'},
+    'M.0.EL': {'sources': ['1', '2', 'M.AG', '4'],
+               'name': 'National Total Excluding LULUCF'},
+}

+ 60 - 470
UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR5_from_csv.py

@@ -3,20 +3,22 @@
 # Data is read from the csv file available for download at the above URL
 # license probably CC-BY 4.0 (see https://datos.gob.ar/dataset/ambiente-emisiones-gases-efecto-invernadero-gei)
 
-import sys
-import camelot
+import pandas as pd
 import primap2 as pm2
-from primap2.pm2io._conversion import convert_ipcc_code_primap_to_primap2
 from UNFCCC_GHG_data.helper import downloaded_data_path, extracted_data_path, \
     process_data_for_country
-from UNFCCC_GHG_data.UNFCCC_DI_reader.UNFCCC_DI_reader_config import gas_baskets
+from UNFCCC_GHG_data.helper import gas_baskets, compression
 
+from config_ARG_BUR5 import unit, time_format, filter_keep, filter_remove
+from config_ARG_BUR5 import (coords_cols, coords_defaults, coords_terminologies,
+                              coords_value_mapping, coords_value_filling)
+from config_ARG_BUR5 import cats_to_agg, meta_data
 
-# TODO
+# INFO
 # data is in long format. Columns needed are
 # 'año' 'id_ipcc' 'tipo_de_gas' 'valor_en_toneladas_de_gas'
 # columns to irgnore are
-columns_to_ignore = ['sector', 'actividad', 'subactividad', 'categoria', 'valor_en_toneladas_de_co2e']
+# columns_to_ignore = ['sector', 'actividad', 'subactividad', 'categoria', 'valor_en_toneladas_de_co2e']
 # sector codes are in primap1 format (no dots), reading should be possible directly from CSV into interchange format
 # postprocessing needed is aggregation of gas baskets and categories as only the highest detail categories are present
 
@@ -25,9 +27,6 @@ columns_to_ignore = ['sector', 'actividad', 'subactividad', 'categoria', 'valor_
 # configuration
 # ###
 
-# TODO: lot's of empty lines are written in csv file. check if solved with new
-#  PRIMAP2 version
-
 # folders and files
 input_folder = downloaded_data_path / 'UNFCCC' / 'Argentina' / \
                'BUR5'
@@ -40,488 +39,79 @@ output_filename = 'ARG_BUR5_2023_'
 csv_file = 'emisiones_gei_inventario_datos_totales_1990_2020.csv'
 
 
-# cats = ['1A1a', '1A1b', '1A1cii', '1A2a', '1A2b', '1A2c', '1A2d', '1A2e',
-#        '1A2f', '1A2g', '1A2j', '1A2l', '1A2m', '1A3aii', '1A3biii',
-#        '1A3bvii', '1A3c', '1A3dii', '1A3ei', '1A4ai', '1A4aii', '1A4aiii',
-#        '1A4b', '1A4c', '1B1ai1', '1B1ai2', '1B1ci', '1B2ai', '1B2aii',
-#        '1B2aiii', '1B2aiv', '1B2bi', '1B2bii', '1B2biii', '1B2biv',
-#        '1B2bv', '1B2bvi', '2A1', '2A2', '2A4a', '2A4b', '2A4d', '2B1',
-#        '2B2', '2B5', '2B7', '2B8a', '2B8b', '2B8c', '2B8f', '2B9a', '2C1',
-#        '2C2', '2C3', '2C6', '2D1', '2D2', '2F1a', '2F1b', '2F2', '2F3',
-#        '2F4', '3A1ai', '3A1aii', '3A1b', '3A1c', '3A1d', '3A1e', '3A1f',
-#        '3A1g', '3A1h', '3A2ai', '3A2aii', '3A2b', '3A2c', '3A2d', '3A2e',
-#        '3A2f', '3A2g', '3A2h', '3A2i', '3B1ai1', '3B1ai2', '3B1aii1',
-#        '3B1aii2', '3B2bi', '3B2bii', '3B3bi', '3B3bii', '3B7', '3C1ai',
-#        '3C1aii', '3C1bi', '3C1bii', '3C1ci', '3C1cii', '3C3', '3C4a',
-#        '3C4b', '3C4c', '3C4di', '3C4dii', '3C4diii', '3C4div', '3C4dv',
-#        '3C4dvi', '3C4dvii', '3C4e', '3C4f', '3C4gi', '3C4gii', '3C4n',
-#        '3C4o', '3C5ai', '3C5aii', '3C5bi', '3C5bii', '3C5ci', '3C5cii',
-#        '3C5di1', '3C5di2', '3C5dii1', '3C5dii2', '3C5diii1', '3C5diii2',
-#        '3C5div1', '3C5div2', '3C5dv1', '3C5dv2', '3C5dvi1', '3C5dvi2',
-#        '3C5dvii1', '3C5dvii2', '3C5e', '3C5fii', '3C5gi1', '3C5gi2',
-#        '3C5gii1', '3C5gii2', '3C5ni', '3C5nii', '3C5oi', '3C5oii',
-#        '3C6ai1', '3C6aii1', '3C6aii2', '3C6hi', '3C6hii', '3C6ii', '3C7',
-#        '3D1', '4A1', '4A3', '4B', '4C1', '4D1', '4D2a', '4D2b', '4D2c',
-#        '4D2d', '4D2e']
-
 # read the data
+data_pd = pd.read_csv(
+    input_folder / csv_file,
+    sep=';',
+    parse_dates=[coords_cols["time"]],
+    usecols=list(coords_cols.values()),
+)
 
-######
-cat_codes_manual = {  # conversion to PRIMAP1 format
-    '1A6': 'MBIO',
-    '1A3di': 'MBKM',
-    '1A3ai': 'MBKA',
-    '1A3di Navegación marítima y fluvial internacional': 'MBKM',
-    'S/N': 'MMULTIOP',
-}
-
-cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
-
-time_format = "%Y"
-
-coords_cols = {
-    "category": "category",
-    "entity": "entity",
-    "unit": "unit",
-}
-
-add_coords_cols = {
-    "orig_cat_name": ["orig_cat_name", "category"],
-}
-
-coords_terminologies = {
-    "area": "ISO3",
-    "category": "IPCC2006_PRIMAP",
-    "scenario": "PRIMAP",
-}
-
-coords_defaults = {
-    "source": "ARG-GHG-Inventory",
-    "provenance": "measured",
-    "area": "ARG",
-    "scenario": "BUR5",
-    "unit": "tonnes" # this might not work as he entity has to be specified
-}
-
-coords_value_mapping = {
-    "category": "PRIMAP1",
-    "entity": {
-        'HFC_23': 'HFC23',
-        'HFC_32': 'HFC32',
-        'HFC_125': 'HFC125',
-        'HFC_134a': 'HFC134a',
-        'HFC_152a': 'HFC152a',
-        'HFC_143a': 'HFC143a',
-        'HFC_227ea': 'HFC227ea',
-        'HFC_236fa': 'HFC236fa',
-        'HFC_365mfc': 'HFC365mfc',
-        'HFC_245fa': 'HFC245fa',
-        'PFC_143_CF4': 'CF4',
-        'PFC_116-C2F6': 'C2F6',
-    },
-}
-
-coords_value_filling = {
-}
-
-filter_remove = {
-}
-
-filter_keep = {}
-
-meta_data = {
-    "references": "",
-    "rights": "",
-    "contact": "mail@johannes-guetschow.de",
-    "title": "",
-    "comment": "Read fom pcsv file by Johannes Gütschow",
-    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
-}
-#####
-
-data_IF = pm2.pm2io.read_long_csv_file_if(
-    csv_file,
+data_pd["unit"] = unit
+coords_cols["unit"] = "unit"
 
-    #coords_value_filling=coords_value_filling,
+data_if = pm2.pm2io.convert_long_dataframe_if(
+    data_pd,
+    coords_cols=coords_cols,
+    coords_defaults=coords_defaults,
+    coords_value_mapping=coords_value_mapping,
+    coords_value_filling=coords_value_filling,
+    coords_terminologies=coords_terminologies,
     filter_remove=filter_remove,
     filter_keep=filter_keep,
-
+    meta_data=meta_data,
+    time_format=time_format,
 )
 
-
-
-
-# definitions part 1: reading data from pdf and preprocessing for conversion to PRIMAP2 format
-# part 1.1 KyotoGHG, CO2, CH4, N2O tables
-#
-pages_to_read = range(232, 244)
-data_start_keyword = "Id#"
-data_end_keyword = "Fuente: Elaboración propia"
-index_cols = ['Id#', 'Nombre']
-col_rename = {
-    index_cols[0]: "category",
-    index_cols[1]: "orig_cat_name"
-}
-metadata = {
-    "entity": [0, 1],
-    "unit": [0, 2]
-}
-
-rows_to_drop = [0]
-
-metadata_mapping = {
-    'unit': {
-        '(GgCO2e)': 'GgCO2e',
-        '(GgCO2)': 'Gg',
-        '(GgN2O)': 'Gg',
-        '(GgCH4)': 'Gg',
-        '(GgGas)': 'Gg',
-    }
-}
-
-# part 1.2: fgases table
-# the f-gases table is in wide format with no sectoral resolution and gases as row header
-pages_to_read_fgases = range(244, 247)
-data_start_keyword_fgases = "Gas"
-index_cols_fgases = ['Gas']
-cols_to_drop_fgases = ["Nombre"]
-metadata_fgases = {
-    "unit": [0, 2],
-    "category": '2',
-    "orig_cat_name": "PROCESOS INDUSTRIALES Y USO DE PRODUCTOS",
-}
-col_rename_fgases = {
-    index_cols_fgases[0]: "entity",
-}
-
-## definitions for conversion to PRIMAP2 format
-# rows to remove
-cats_remove = ["Information Items", "Memo Items (3)"]
-# manual category codes
-cat_codes_manual = {  # conversion to PRIMAP1 format
-    '1A6': 'MBIO',
-    '1A3di': 'MBKM',
-    '1A3ai': 'MBKA',
-    '1A3di Navegación marítima y fluvial internacional': 'MBKM',
-    'S/N': 'MMULTIOP',
-}
-
-cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
-
-time_format = "%Y"
-
-coords_cols = {
-    "category": "category",
-    "entity": "entity",
-    "unit": "unit",
-}
-
-add_coords_cols = {
-    "orig_cat_name": ["orig_cat_name", "category"],
-}
-
-coords_terminologies = {
-    "area": "ISO3",
-    "category": "IPCC2006_PRIMAP",
-    "scenario": "PRIMAP",
-}
-
-coords_defaults = {
-    "source": "ARG-GHG-Inventory",
-    "provenance": "measured",
-    "area": "ARG",
-    "scenario": "BUR4",
-}
-
-coords_value_mapping = {
-    #    "category": "PRIMAP1",
-    "entity": {
-        'HFC-23': 'HFC23',
-        'HFC-32': 'HFC32',
-        'HFC-41': 'HFC41',
-        'HFC-43-10mee': 'HFC4310mee',
-        'HFC-125': 'HFC125',
-        'HFC-134': 'HFC134',
-        'HFC-134a': 'HFC134a',
-        'HFC-152a': 'HFC152a',
-        'HFC-143': 'HFC143',
-        'HFC-143a': 'HFC143a',
-        'HFC-227ea': 'HFC227ea',
-        'HFC-236fa': 'HFC236fa',
-        'HFC-245ca': 'HFC245ca',
-        'HFC-365mfc': 'HFC365mfc',
-        'HFC-245fa': 'HFC245fa',
-        'PFC-143 (CF4)': 'CF4',
-        'PFC-116 (C2F6)': 'C2F6',
-        'PFC-218 (C3F8)': 'C3F8',
-        'PFC-31-10 (C4F10)': 'C4F10',
-        'c-C4F8': 'cC4F8',
-        'PFC-51-144 (C6F14)': 'C6F14',
-    },
-    "unit": "PRIMAP1",
-    "orig_cat_name": {
-        "1A3di Navegación marítima y fluvial internacional": "Navegación marítima y fluvial internacional",
-    }
-}
-
-coords_value_filling = {
-    "category": {
-        "orig_cat_name": {
-            "Total de emisiones y absorciones nacionales": "0",
-            "Navegación marítima y fluvial internacional": "M.BK.M",
-            "Operaciones Multilaterales": "M.MULTIOP",
-            "Emisiones de CO2 provenientes del uso de biomasa como combustible": "M.BIO",
-        },
-    },
-    "orig_cat_name": {
-        "category": {
-            "M.BK.M": "Navegación marítima y fluvial internacional",
-        },
-    },
-}
-
-filter_remove = {
-    "f1": {
-        "orig_cat_name": ["Elementos Recordatorios"],
-    },
-}
-
-filter_keep = {}
-
-meta_data = {
-    "references": "https://unfccc.int/documents/419772",
-    "rights": "XXXX",
-    "contact": "mail@johannes-guetschow.de",
-    "title": "Cuarto Informe Bienal de Actualización de la República Argentina a la Convención Marco delas Naciones Unidas Sobre el Cambio Climático",
-    "comment": "Read fom pdf file by Johannes Gütschow",
-    "institution": "United Nations Framework Convention on Climate Change (UNFCCC)",
-}
-
-compression = dict(zlib=True, complevel=9)
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+data_if = data_pm2.pr.to_interchange_format()
 
 # ###
-# start data reading
+# save data to IF and native format
 # ###
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(output_folder /
+                                   (output_filename + coords_terminologies["category"]
+                                    + "_raw"), data_if)
 
-# change working directory to script directory for proper folder names
-script_path = os.path.abspath(sys.argv[0])
-script_dir_name = os.path.dirname(script_path)
-os.chdir(script_dir_name)
-
-# read data for KyotoGHG, CO2, CH4, N2O
-data_all = None
-for page in pages_to_read:
-    # read current page
-    tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
-                              flavor='stream')
-    df_current = tables[0].df
-    rows_to_drop = []
-    for index, data in df_current.iterrows():
-        if data[0] == data_start_keyword:
-            break
-        else:
-            rows_to_drop.append(index)
-
-    end_of_data = False
-    for index, data in df_current.iterrows():
-        if data_end_keyword in list(data):
-            end_of_data = True
-        if end_of_data:
-            rows_to_drop.append(index)
-
-    df_current = df_current.drop(rows_to_drop)
-    idx_header = df_current.index[df_current[0] == index_cols[0]].tolist()
-    df_current = df_current.rename(
-        dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
-    df_current = df_current.drop(idx_header)
-
-    # for sheet "Aggregate GHGs" fill entity cell
-    if page in range(232, 235):
-        df_current.iloc[
-            metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
-    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
-    # as without one of them there is no category information
-    df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
-    # set index. necessary for the stack operation in the conversion to long format
-    # df_current = df_current.set_index(index_cols)
-    # add columns
-    inserted = 0
-    for col in metadata.keys():
-        # print(f"coordinates: {metadata[col][0]}, {metadata[col][1]}")
-        value = df_current.iloc[metadata[col][0], metadata[col][1] + inserted]
-        if col in metadata_mapping.keys():
-            if value in metadata_mapping[col].keys():
-                value = metadata_mapping[col][value]
-        # print(f"Inserting column {col} with value {value}")
-        df_current.insert(2, col, value)
-        inserted += 1
-
-    # drop unit row
-    # for row in rows_to_drop:
-    #    df_current = df_current.drop(df_current.iloc[row].name)
-    df_current = df_current.drop(df_current.index[0])
-
-    # fix number format
-    df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
-    df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
-                                  axis=1)
-
-    df_current.rename(columns=col_rename, inplace=True)
-
-    # reindex
-    df_current.reset_index(inplace=True, drop=True)
-
-    df_current["category"] = df_current["category"].replace(cat_codes_manual)
-    # then the regex replacements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
-    df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
-                                                                regex=True)
-
-    df_current = df_current.reset_index(drop=True)
-
-    # make sure all col headers are str
-    df_current.columns = df_current.columns.map(str)
-
-    # convert to PRIMAP2 interchange format
-    data_if = pm2.pm2io.convert_wide_dataframe_if(
-        df_current,
-        coords_cols=coords_cols,
-        add_coords_cols=add_coords_cols,
-        coords_defaults=coords_defaults,
-        coords_terminologies=coords_terminologies,
-        coords_value_mapping=coords_value_mapping,
-        coords_value_filling=coords_value_filling,
-        filter_remove=filter_remove,
-        filter_keep=filter_keep,
-        meta_data=meta_data
-    )
-
-    # convert to PRIMAP2 native format
-    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
-
-    # aggregate to one df
-    if data_all is None:
-        data_all = data_pm2
-    else:
-        data_all = data_all.pr.merge(data_pm2)
-
-# read fgases
-for page in pages_to_read_fgases:
-    # read current page
-    tables = camelot.read_pdf(str(input_folder / pdf_file), pages=str(page),
-                              flavor='stream')
-    df_current = tables[0].df
-    rows_to_drop = []
-    for index, data in df_current.iterrows():
-        if data[0] == data_start_keyword_fgases:
-            break
-        else:
-            rows_to_drop.append(index)
-
-    end_of_data = False
-    for index, data in df_current.iterrows():
-        if data_end_keyword in list(data):
-            end_of_data = True
-        if end_of_data:
-            rows_to_drop.append(index)
-
-    df_current = df_current.drop(rows_to_drop)
-    idx_header = df_current.index[df_current[0] == index_cols_fgases[0]].tolist()
-    df_current = df_current.rename(
-        dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
-    df_current = df_current.drop(idx_header)
-
-    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
-    # as without one of them there is no category information
-    df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
-    # set index. necessary for the stack operation in the conversion to long format
-    # df_current = df_current.set_index(index_cols)
-    # add columns
-    inserted = 0
-    for col in metadata_fgases.keys():
-        # print(f"coordinates: {metadata[col][0]}, {metadata[col][1]}")
-        if isinstance(metadata_fgases[col], str):
-            value = metadata_fgases[col]
-        else:
-            value = df_current.iloc[
-                metadata_fgases[col][0], metadata_fgases[col][1] + inserted]
-            if col in metadata_mapping.keys():
-                if value in metadata_mapping[col].keys():
-                    value = metadata_mapping[col][value]
-        # print(f"Inserting column {col} with value {value}")
-        df_current.insert(2, col, value)
-        inserted += 1
-
-    # remove unnecessary columns
-    df_current = df_current.drop(columns=cols_to_drop_fgases)
-
-    # drop unit row
-    df_current = df_current.drop(df_current.index[0])
-
-    # fix number format
-    df_current = df_current.apply(lambda x: x.str.replace('.', '', regex=False), axis=1)
-    df_current = df_current.apply(lambda x: x.str.replace(',', '.', regex=False),
-                                  axis=1)
-
-    df_current.rename(columns=col_rename_fgases, inplace=True)
-
-    # reindex
-    df_current.reset_index(inplace=True, drop=True)
-
-    df_current["category"] = df_current["category"].replace(cat_codes_manual)
-    # then the regex repalcements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
-    df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
-                                                                regex=True)
-
-    df_current = df_current.reset_index(drop=True)
-
-    # make sure all col headers are str
-    df_current.columns = df_current.columns.map(str)
-
-    # convert to PRIMAP2 interchange format
-    data_if = pm2.pm2io.convert_wide_dataframe_if(
-        df_current,
-        coords_cols=coords_cols,
-        add_coords_cols=add_coords_cols,
-        coords_defaults=coords_defaults,
-        coords_terminologies=coords_terminologies,
-        coords_value_mapping=coords_value_mapping,
-        coords_value_filling=coords_value_filling,
-        filter_remove=filter_remove,
-        filter_keep=filter_keep,
-        meta_data=meta_data
-    )
-
-    # convert to PRIMAP2 native format
-    data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+data_pm2 = pm2.pm2io.from_interchange_format(data_if)
+encoding = {var: compression for var in data_pm2.data_vars}
+data_pm2.pr.to_netcdf(output_folder /
+                      (output_filename + coords_terminologies["category"]
+                       + "_raw" + ".nc"), encoding=encoding)
 
-    # aggregate to one df
-    data_all = data_all.pr.merge(data_pm2)
+### processing
+data_proc_pm2 = data_pm2
 
-# ###
-# process (aggregate fgases)
-# ###
-data_all = process_data_for_country(
-    data_all,
+# actual processing
+country_processing = {
+    'aggregate_cats': cats_to_agg,
+}
+data_proc_pm2 = process_data_for_country(
+    data_proc_pm2,
     entities_to_ignore=[],
     gas_baskets=gas_baskets,
-    processing_info_country=None,
+    processing_info_country=country_processing,
 )
 
+# adapt source and metadata
+current_source = data_proc_pm2.coords["source"].values[0]
+data_temp = data_proc_pm2.pr.loc[{"source": current_source}]
+data_proc_pm2 = data_proc_pm2.pr.set("source", 'BUR_NIR', data_temp)
 
 # ###
 # save data to IF and native format
 # ###
+data_proc_if = data_proc_pm2.pr.to_interchange_format()
+if not output_folder.exists():
+    output_folder.mkdir()
+pm2.pm2io.write_interchange_format(
+    output_folder / (output_filename + coords_terminologies["category"]), data_proc_if)
 
-encoding = {var: compression for var in data_all.data_vars}
-data_all.pr.to_netcdf(output_folder / (output_filename + coords_terminologies[
-    "category"] + ".nc"), encoding=encoding)
-
-data_if = data_all.pr.to_interchange_format()
-pm2.pm2io.write_interchange_format(output_folder / (output_filename + coords_terminologies["category"]), data_if)
-
-
-
+encoding = {var: compression for var in data_proc_pm2.data_vars}
+data_proc_pm2.pr.to_netcdf(
+    output_folder / (output_filename + coords_terminologies["category"] + ".nc"),
+    encoding=encoding)
 
 

+ 2 - 0
UNFCCC_GHG_data/helper/__init__.py

@@ -5,6 +5,7 @@ from .definitions import downloaded_data_path, downloaded_data_path_UNFCCC
 from .definitions import dataset_path, dataset_path_UNFCCC
 from .definitions import custom_country_mapping, custom_folders
 from .definitions import GWP_factors, gas_baskets
+from .definitions import compression
 from .functions import get_country_code, get_country_name, convert_categories
 from .functions import create_folder_mapping, process_data_for_country, get_code_file
 from .functions import fix_rows
@@ -30,4 +31,5 @@ __all__ = [
     "create_folder_mapping",
     "process_data_for_country",
     "fix_rows",
+    "compression",
 ]

+ 3 - 1
UNFCCC_GHG_data/helper/definitions.py

@@ -168,4 +168,6 @@ gas_baskets = {
                             'PFCS (AR5GWP100)'],
     'KYOTOGHG (AR6GWP100)': ['CO2', 'CH4', 'N2O', 'SF6', 'NF3', 'HFCS (AR6GWP100)',
                             'PFCS (AR6GWP100)'],
-}
+}
+
+compression = dict(zlib=True, complevel=9)

+ 2 - 0
UNFCCC_GHG_data/helper/functions.py

@@ -222,6 +222,7 @@ def process_data_for_country(
 
         # aggregate categories
         if "aggregate_cats" in processing_info_country:
+            data_country = data_country.pr.dequantify()
             if "agg_tolerance" in processing_info_country:
                 agg_tolerance = processing_info_country["agg_tolerance"]
             else:
@@ -270,6 +271,7 @@ def process_data_for_country(
                     )
                 else:
                     print(f"no data to aggregate category {cat_to_agg}")
+            data_country = data_country.pr.quantify()
 
         # copy HFCs and PFCs with default factors
         if "basket_copy" in processing_info_country:

+ 1 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/pv/9g/MD5E-s1660050--3787beb422ef3af173dfb158da104660.csv/MD5E-s1660050--3787beb422ef3af173dfb158da104660.csv

+ 1 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/35/qV/MD5E-s788143--edf517ddb0106df8a8edeb255c24550b.nc/MD5E-s788143--edf517ddb0106df8a8edeb255c24550b.nc

+ 23 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP.yaml

@@ -0,0 +1,23 @@
+attrs:
+  ref: https://unfccc.int/documents/634953
+  ref2: https://ciam.ambiente.gob.ar/repositorio.php?tid=9&stid=36&did=394#
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ' Processed on 2024-04-05'
+  comment: Read fom pcsv file by Johannes Gütschow Processed on 2024-04-05
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - category (IPCC2006_PRIMAP)
+  - source
+  - area (ISO3)
+  - entity
+  - unit
+data_file: ARG_BUR5_2023_IPCC2006_PRIMAP.csv

+ 1 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.csv

@@ -0,0 +1 @@
+../../../.git/annex/objects/wQ/1P/MD5E-s79644--ecec4d6ed3ea5bc5a5ca1c503007db9c.csv/MD5E-s79644--ecec4d6ed3ea5bc5a5ca1c503007db9c.csv

+ 1 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.nc

@@ -0,0 +1 @@
+../../../.git/annex/objects/w4/3q/MD5E-s149640--ae27906848d822d8584154d1092c73ae.nc/MD5E-s149640--ae27906848d822d8584154d1092c73ae.nc

+ 23 - 0
extracted_data/UNFCCC/Argentina/ARG_BUR5_2023_IPCC2006_PRIMAP_raw.yaml

@@ -0,0 +1,23 @@
+attrs:
+  ref: https://unfccc.int/documents/634953
+  ref2: https://ciam.ambiente.gob.ar/repositorio.php?tid=9&stid=36&did=394#
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: ''
+  comment: Read fom pcsv file by Johannes Gütschow
+  institution: United Nations Framework Convention on Climate Change (UNFCCC)
+  cat: category (IPCC2006_PRIMAP)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - provenance
+  - category (IPCC2006_PRIMAP)
+  - source
+  - area (ISO3)
+  - entity
+  - unit
+data_file: ARG_BUR5_2023_IPCC2006_PRIMAP_raw.csv