Browse Source

fix in CRF reading code

Johannes 1 year ago
parent
commit
cbceabeabd

+ 1 - 1
.gitignore

@@ -4,7 +4,7 @@ geckodriver.log
 __pycache__
 /JG_test_code/
 .doit.db
-log
+log/*:
 UNFCCC_GHG_data/datasets
 UNFCCC_GHG_data/UNFCCC_DI_reader/test_UNFCCC_DI_reader.ipynb
 UNFCCC_GHG_data/UNFCCC_DI_reader/.ipynb_checkpoints/

+ 9 - 4
UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py

@@ -375,9 +375,14 @@ def read_crf_table_from_file(
         cols_to_drop.append(df_raw.columns.values[0])
     # select only first table by cutting everything after a all-nan column (unless
     # it's the first column)
+    if debug:
+        print(f'Header before table end detection: {df_raw.columns.values}')
     for colIdx in range(1, len(df_raw.columns.values)):
-        if df_raw.iloc[:, colIdx].isna().all():
-            cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx : ])
+        if ((df_raw.iloc[:, colIdx].isna().all()) &
+                (df_raw.columns[colIdx].startswith('Unnamed'))):
+            cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx:])
+            if debug:
+                print(f'cols_to_drop: {cols_to_drop}')
             break
 
     if cols_to_drop is not None:
@@ -402,12 +407,12 @@ def read_crf_table_from_file(
     if "header_fill" in table_properties:
         for row in range(0, len(df_header)):
             if table_properties["header_fill"][row]:
-                header.append(list(df_header.iloc[row].fillna(method="ffill")))
+                header.append(list(df_header.iloc[row].ffill()))
             else:
                 header.append(list(df_header.iloc[row]))
     else:
         for row in range(0, len(df_header)):
-            header.append(list(df_header.iloc[row].fillna(method="ffill")))
+            header.append(list(df_header.iloc[row].ffill()))
 
     # combine all non-unit rows into one
     entities = None

+ 2 - 2
extracted_data/UNFCCC/Russian_Federation/RUS_CRF2023_22082023.yaml

@@ -15,11 +15,11 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - class
-  - provenance
   - area (ISO3)
   - scenario (PRIMAP)
+  - provenance
   - category (CRF2013_2023)
+  - class
   - source
   - entity
   - unit

+ 1 - 0
log/test_read_CRF2023/CRF2023_AUS_2023-09-25.csv

@@ -0,0 +1 @@
+/annex/objects/MD5E-s276166--82935dd088f90a0312142c2c635a7cbc.csv

+ 1 - 0
log/test_read_CRF2023/CRF2023_AUS_2023-09-25.nc

@@ -0,0 +1 @@
+/annex/objects/MD5E-s517276--4c23010860827313a7bc9d03bcecff09.nc

+ 26 - 0
log/test_read_CRF2023/CRF2023_AUS_2023-09-25.yaml

@@ -0,0 +1,26 @@
+attrs:
+  references: https://unfccc.int/ghg-inventories-annex-i-parties/2023
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 'Data submitted in 2023 to the UNFCCC in the common reporting format (CRF)
+    by Australia. Submission date: 13042023'
+  comment: Read fom xlsx file by Johannes Gütschow
+  institution: United Nations Framework Convention on Climate Change (www.unfccc.int)
+  cat: category (CRF2013_2023)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+  sec_cats:
+  - class
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - scenario (PRIMAP)
+  - class
+  - source
+  - category (CRF2013_2023)
+  - provenance
+  - area (ISO3)
+  - entity
+  - unit
+data_file: CRF2023_AUS_2023-09-25.csv

+ 1 - 0
log/test_read_CRF2023/CRF2023_RUS_2023-09-25.csv

@@ -0,0 +1 @@
+/annex/objects/MD5E-s163093--ab5e010c9788e7050c383df2721a4718.csv

+ 1 - 0
log/test_read_CRF2023/CRF2023_RUS_2023-09-25.nc

@@ -0,0 +1 @@
+/annex/objects/MD5E-s474309--0c4eb6604d1efe8016ed72ae3aa0e0ae.nc

+ 26 - 0
log/test_read_CRF2023/CRF2023_RUS_2023-09-25.yaml

@@ -0,0 +1,26 @@
+attrs:
+  references: https://unfccc.int/ghg-inventories-annex-i-parties/2023
+  rights: ''
+  contact: mail@johannes-guetschow.de
+  title: 'Data submitted in 2023 to the UNFCCC in the common reporting format (CRF)
+    by Russian Federation. Submission date: 22082023'
+  comment: Read fom xlsx file by Johannes Gütschow
+  institution: United Nations Framework Convention on Climate Change (www.unfccc.int)
+  cat: category (CRF2013_2023)
+  area: area (ISO3)
+  scen: scenario (PRIMAP)
+  sec_cats:
+  - class
+time_format: '%Y'
+dimensions:
+  '*':
+  - time
+  - area (ISO3)
+  - scenario (PRIMAP)
+  - class
+  - source
+  - category (CRF2013_2023)
+  - provenance
+  - entity
+  - unit
+data_file: CRF2023_RUS_2023-09-25.csv