Daniel Busch 4 months ago
parent
commit
a7f6c6be74
2 changed files with 18 additions and 11 deletions
  1. 4 1
      src/faostat_data_primap/helper/definitions.py
  2. 14 10
      src/faostat_data_primap/read.py

+ 4 - 1
src/faostat_data_primap/helper/definitions.py

@@ -1,5 +1,7 @@
 """definitions like folders, mappings etc."""
 
+from typing import Any
+
 domains = {
     "farm_gate_emissions_crops": {
         "url_domain": "https://www.fao.org/faostat/en/#data/GCE",
@@ -78,7 +80,8 @@ areas_to_remove_base = [
     "Europe, Northern America, Australia and New Zealand",
 ]
 
-read_config_all = {
+# TODO would be a nice to have a type hint here
+read_config_all: Any = {
     "farm_gate_agriculture_energy": {
         "2024-11-14": {
             "filename": "Emissions_Agriculture_Energy_E_All_Data_NOFLAG.csv",

+ 14 - 10
src/faostat_data_primap/read.py

@@ -4,7 +4,7 @@ import os
 import pathlib
 
 import pandas as pd
-import primap2 as pm2
+import primap2 as pm2  # type: ignore
 
 from faostat_data_primap.helper.country_mapping import country_to_iso3_mapping
 from faostat_data_primap.helper.definitions import (
@@ -78,7 +78,8 @@ def read_latest_data(
         domain_path = downloaded_data_path / domain
         files_to_read.append((domain, get_latest_release(domain_path)))
 
-    df_all = None
+    # df_all = None
+    df_list = []
     for domain, release in files_to_read:
         read_config = read_config_all[domain][release]
 
@@ -125,14 +126,17 @@ def read_latest_data(
             axis=1,
         )
 
-        if df_all is None:
-            df_all = df_domain
-        else:
-            df_all = pd.concat(
-                [df_all, df_domain],
-                axis=0,
-                join="outer",
-            ).reset_index(drop=True)
+        df_list.append(df_domain)
+        # if df_all is None:
+        #     df_all = df_domain
+        # else:
+        #     df_all = pd.concat(
+        #         [df_all, df_domain],
+        #         axis=0,
+        #         join="outer",
+        #     ).reset_index(drop=True)
+
+    df_all = pd.concat(df_list, axis=0, join="outer", ignore_index=True)
 
     # sometimes Source is empty
     df_all["Source"] = df_all["Source"].fillna("unknown")