Bladeren bron

Merge remote-tracking branch 'origin/master'

Johannes Gütschow 1 jaar geleden
bovenliggende
commit
6ddd2be192

+ 1 - 1
.gitignore

@@ -4,7 +4,7 @@ geckodriver.log
 __pycache__
 /JG_test_code/
 .doit.db
-log
+log/*
 UNFCCC_GHG_data/datasets
 UNFCCC_GHG_data/UNFCCC_DI_reader/test_UNFCCC_DI_reader.ipynb
 UNFCCC_GHG_data/UNFCCC_DI_reader/.ipynb_checkpoints/

+ 9 - 4
UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py

@@ -375,9 +375,14 @@ def read_crf_table_from_file(
         cols_to_drop.append(df_raw.columns.values[0])
     # select only first table by cutting everything after a all-nan column (unless
     # it's the first column)
+    if debug:
+        print(f'Header before table end detection: {df_raw.columns.values}')
     for colIdx in range(1, len(df_raw.columns.values)):
-        if df_raw.iloc[:, colIdx].isna().all():
-            cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx : ])
+        if ((df_raw.iloc[:, colIdx].isna().all()) &
+                (df_raw.columns[colIdx].startswith('Unnamed'))):
+            cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx:])
+            if debug:
+                print(f'cols_to_drop: {cols_to_drop}')
             break
 
     if cols_to_drop is not None:
@@ -402,12 +407,12 @@ def read_crf_table_from_file(
     if "header_fill" in table_properties:
         for row in range(0, len(df_header)):
             if table_properties["header_fill"][row]:
-                header.append(list(df_header.iloc[row].fillna(method="ffill")))
+                header.append(list(df_header.iloc[row].ffill()))
             else:
                 header.append(list(df_header.iloc[row]))
     else:
         for row in range(0, len(df_header)):
-            header.append(list(df_header.iloc[row].fillna(method="ffill")))
+            header.append(list(df_header.iloc[row].ffill()))
 
     # combine all non-unit rows into one
     entities = None

+ 1 - 0
downloaded_data/non-UNFCCC/PSE/2022-Inventory/Emissions_2020_01-01.xlsx

@@ -0,0 +1 @@
+../../../../.git/annex/objects/4v/pw/MD5E-s72003--67147bb156227a35ea07939a59677d08.xlsx/MD5E-s72003--67147bb156227a35ea07939a59677d08.xlsx

+ 2 - 2
extracted_data/UNFCCC/Russian_Federation/RUS_CRF2023_22082023.yaml

@@ -15,11 +15,11 @@ time_format: '%Y'
 dimensions:
   '*':
   - time
-  - class
-  - provenance
   - area (ISO3)
   - scenario (PRIMAP)
+  - provenance
   - category (CRF2013_2023)
+  - class
   - source
   - entity
   - unit