浏览代码

Fix or ignore ruff errors and improve docstrings

Johannes Gütschow 11 月之前
父节点
当前提交
d33206ce3e
共有 100 个文件被更改,包括 2716 次插入2408 次删除
  1. 2 2
      .github/ISSUE_TEMPLATE/country-data-template-non-annexi.md
  2. 3 3
      Makefile
  3. 0 63
      README.md
  4. 0 1
      datasets/UNFCCC/no_updates_until.txt
  5. 0 1
      docs/source/api/unfccc_ghg_data.helper.country_info.rst
  6. 0 1
      docs/source/api/unfccc_ghg_data.helper.folder_mapping.rst
  7. 12 0
      docs/source/api/unfccc_ghg_data.helper.functions_temp.rst
  8. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_raw_for_year.rst
  9. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2021_specification.rst
  10. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2022_specification.rst
  11. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_aus_specification.rst
  12. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_specification.rst
  13. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.util.rst
  14. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year.rst
  15. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year_datalad.rst
  16. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission.rst
  17. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission_datalad.rst
  18. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_crf_reader.test_read_unfccc_crf_for_year.rst
  19. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country.rst
  20. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_datalad.rst
  21. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group.rst
  22. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group_datalad.rst
  23. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country.rst
  24. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_datalad.rst
  25. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group.rst
  26. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group_datalad.rst
  27. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_config.rst
  28. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.download_annexI.rst
  29. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.download_ndc.rst
  30. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.download_nonannexI.rst
  31. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_annexI.rst
  32. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_bur.rst
  33. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_nc.rst
  34. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR4_from_pdf.rst
  35. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.config_chl_bur4.rst
  36. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR4_from_xlsx.rst
  37. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR5_from_xlsx.rst
  38. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Colombia.read_COL_BUR3_from_xlsx.rst
  39. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Indonesia.read_IDN_BUR3_from_pdf.rst
  40. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Israel.read_ISR_BUR2_from_pdf.rst
  41. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur3.rst
  42. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur4.rst
  43. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR3_from_pdf.rst
  44. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR4_from_pdf.rst
  45. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Mexico.read_MEX_BUR3_from_pdf.rst
  46. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Montenegro.config_mne_bur3.rst
  47. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Montenegro.read_MNE_BUR3_from_pdf.rst
  48. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Morocco.config_mar_bur3.rst
  49. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Morocco.read_MAR_BUR3_from_pdf.rst
  50. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Nigeria.config_nga_bur2.rst
  51. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Nigeria.read_NGA_BUR2_from_pdf.rst
  52. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Peru.config_per_bur3.rst
  53. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Peru.read_PER_BUR3_from_pdf.rst
  54. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_kor_bur4.rst
  55. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2021_Inventory_from_xlsx.rst
  56. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2022_Inventory_from_xlsx.rst
  57. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_BUR4_from_xlsx.rst
  58. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Singapore.config_sgp_bur5.rst
  59. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Singapore.read_SGP_BUR5_from_pdf.rst
  60. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2022_Inventory_from_pdf.rst
  61. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur3.rst
  62. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur4.rst
  63. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR3_from_pdf.rst
  64. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR4_from_pdf.rst
  65. 0 1
      docs/source/api/unfccc_ghg_data.unfccc_reader.read_UNFCCC_submission.rst
  66. 2 0
      docs/source/conf.py
  67. 68 0
      docs/source/data_format.md
  68. 9 7
      docs/source/usage.md
  69. 1 1
      downloaded_data/UNFCCC/folder_mapping.json
  70. 1 1
      downloaded_data/non-UNFCCC/folder_mapping.json
  71. 1 1
      extracted_data/UNFCCC/folder_mapping.json
  72. 1 1
      extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2023-Inventory_2023_IPCC1996_KOR_INV.yaml
  73. 1 1
      extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2023-Inventory_2023_IPCC2006_PRIMAP.yaml
  74. 1 1
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.yaml
  75. 1 1
      extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.yaml
  76. 1 1
      extracted_data/non-UNFCCC/folder_mapping.json
  77. 1 1
      legacy_data/UNFCCC/folder_mapping.json
  78. 138 129
      poetry.lock
  79. 11 1
      pyproject.toml
  80. 7 4
      src/unfccc_ghg_data/__init__.py
  81. 8 5
      src/unfccc_ghg_data/helper/country_info.py
  82. 189 10
      src/unfccc_ghg_data/helper/functions.py
  83. 0 164
      src/unfccc_ghg_data/helper/functions_temp.py
  84. 6 6
      src/unfccc_ghg_data/unfccc_crf_reader/__init__.py
  85. 3 2
      src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py
  86. 528 481
      src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2021_specification.py
  87. 528 481
      src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2022_specification.py
  88. 528 481
      src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2023_specification.py
  89. 7 7
      src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/util.py
  90. 18 13
      src/unfccc_ghg_data/unfccc_crf_reader/read_new_unfccc_crf_for_year.py
  91. 15 10
      src/unfccc_ghg_data/unfccc_crf_reader/read_new_unfccc_crf_for_year_datalad.py
  92. 16 10
      src/unfccc_ghg_data/unfccc_crf_reader/read_unfccc_crf_submission.py
  93. 14 8
      src/unfccc_ghg_data/unfccc_crf_reader/read_unfccc_crf_submission_datalad.py
  94. 8 9
      src/unfccc_ghg_data/unfccc_crf_reader/test_read_unfccc_crf_for_year.py
  95. 185 194
      src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py
  96. 147 91
      src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py
  97. 188 141
      src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py
  98. 53 10
      src/unfccc_ghg_data/unfccc_crf_reader/util.py
  99. 4 2
      src/unfccc_ghg_data/unfccc_di_reader/__init__.py
  100. 10 4
      src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country.py

+ 2 - 2
.github/ISSUE_TEMPLATE/country-data-template-non-annexi.md

@@ -33,11 +33,11 @@ Detailed data for 2015, less data for other years but main sectors present.
 The terminology is important as data in IPCC2006 categories has priority as it will currently not be made available through the UNFCCC interface.
 
 ### National communications (NC)
-* 
+*
 
 ### Biannial Update Reports (BUR)
 *
- 
+
 ### Nationally Determined Contributions (NDC)
 *
 

+ 3 - 3
Makefile

@@ -36,9 +36,9 @@ checks:  ## run all the linting checks of the codebase
 ruff-fixes:  ## fix the code using ruff
     # format before and after checking so that the formatted stuff is checked and
     # the fixed stuff is formatted
-	poetry run ruff format src tests scripts docs/source/conf.py docs/source/notebooks/*.py
-	poetry run ruff src tests scripts docs/source/conf.py docs/source/notebooks/*.py --fix
-	poetry run ruff format src tests scripts docs/source/conf.py docs/source/notebooks/*.py
+	poetry run ruff format src tests scripts docs/source/conf.py
+	poetry run ruff src tests scripts docs/source/conf.py  --fix
+	poetry run ruff format src tests scripts docs/source/conf.py
 
 .PHONY: ruff-fixes-current
 ruff-fixes-current:  ## fix the code using ruff

+ 0 - 63
README.md

@@ -86,71 +86,8 @@ The code for downloading submissions is based on [national-inventory-submissions
 **The repository is currently under initial development so a lot of things are still subject to change.**
 
 ## Description
-### Repository structure
-The repository is structured by folders. Here we list the folders in order of processing.
 
-* **downloaded_data** This folder contains data downloaded from the UNFCCC website and other sources. For Biannual Update Reports (BUR), national Communications (NC), and Nationally Determined Contributions (NDC) an automatical dowloaded exists (folder UNFCCC). Within the UNFCCC folder the data is organized in a *\<country\>/\<submission\>* structure. NDC submissions are often revised. To be able to keep track of the targets and emissions inventories we store each NDC revision in a time-stamped folder. The *non-UNFCCC* folder contains official country inventories not (yet) submitted to the UNFCCC. The internal structure is the same as for the UNFCCC folder.
-* **analyzed_submissions** Here we collect all files needed to extract data from submissions. Subfolders are countries (use the same names as in the *downloaded data* folder) and within the country folders each submission / report should have it's own subfolder, e.g. *Argentina/BUR1*. National Inventory Reports (NIR) are submitted together with BURs or NCs and have no individual folder but are used as additional inputs to their BUR or NC. As the repository is in the process of being set up, there currently is no data available.
-* **extracted_data** This folder holds all extracted datasets in PRIMAP2 interchange format. The datasets are organized in country subfolders. The naming convention for the datasets is the following: *\<iso\>\_\<sub\>\_\<year\>_\<term\>* where *\<iso\>* is the countries 3 letter iso code, *\<sub\>* is the submissions, e.g. **BUR1**, **NC5**, or **inventory2020** (for a non-UNFCCC inventory), *\<year\>* is the year of publication, and *\<term\>* is the main sector terminology e.g. IPCC2006 or IPCC1996. As the repository is in the process of being set up, there currently is no data available.
-* **code** Code that is used for several countries / reports, but not (yet) part of the primap2 package. This folder also contains scripts that automate data reading for all analyzed submissions or subsets (e.g. all first BURs) and code to generate composite datasets. Currently the only subfolder is the *UNFCCC_downloader* where code to automatically download BUR and NC submission files from the [UNFCCC website](https://www.unfccc.int) resides.
-* **composite_datasets** This folder contains generated composite datasets in PRIMAP2 interchnage format. Each dataset has it's own subfolder which should contain a dataset name, a version, and publication date (e.g. year). As the repository is in the process of being set up, there currently is no data available.
-* **legacy_data** This folder holds all extracted datasets in PRIMAP2 interchange format. The datasets are organized in country subfolders. The naming convention for the datasets is the following: *\<iso\>\_\<sub\>\_\<year\>\_\<term\>\_\<extra\>* where *\<iso\>* is the countries 3 letter iso code, *\<sub\>* is the submissions, e.g. **BUR1**, **NC5**, or **inventory2020** (for a non-UNFCCC inventory), *\<year\>* is the year of publication, *\<term\>* is the main sector terminology e.g. IPCC2006 or IPCC1996, and *\<extra\>* is a free identifier to distinguish several files for the same submission (in some cases data for e.g. fluorinated gases are in a separate file). This folder also holds data where the code or some input files are not publicly available. Our aim is to reduce data in this folder to zero and to create fully open source processes for all datasets such that they can be included in the main folder.
 
-### Data format description (columns)
-All data in this repository in the comma-separated values (CSV) files is formatted consistently with the PRIMAP2 interchange format.
-
-The data contained in each column is as follows:
-
-#### "source"
-Name of the data source. Four country specific datasets it is `\<ISO3\>-GHG-inventory`, where `\<ISO3\>` is the ISO 3166 three-letter country code. Specifications for composite datasets including several countries will be added when the datasets are available.
-
-#### "scenario (PRIMAP)"
-The scenario specifies the submissions (e.g. BUR1, NC5, or Inventory_2021 for a non-UNFCCC inventory)
-
-#### "provenance"
-Provenance of the data. Here: "derived" as it is a composite source.
-
-#### "country (ISO3)"
-ISO 3166 three-letter country codes.
-
-#### "entity"
-Gas categories using global warming potentials (GWP) from either Second Assessment Report (SAR) or Fourth Assessment Report (AR4).
-
-Code                     Description
-----                     -----------
-CH4                      Methane
-CO2                      Carbon Dioxide
-N2O                      Nitrous Oxide
-HFCS (SARGWP100)         Hydrofluorocarbons (SAR)
-HFCS (AR4GWP100)         Hydrofluorocarbons (AR4)
-PFCS (SARGWP100)         Perfluorocarbons (SAR)
-PFCS (AR4GWP100)         Perfluorocarbons (AR4)
-SF6                      Sulfur Hexafluoride
-NF3                      Nitrogen Trifluoride
-FGASES (SARGWP100)       Fluorinated Gases (SAR): HFCs, PFCs, SF$_6$, NF$_3$
-FGASES (AR4GWP100)       Fluorinated Gases (AR4): HFCs, PFCs, SF$_6$, NF$_3$
-KYOTOGHG (SARGWP100)     Kyoto greenhouse gases (SAR)
-KYOTOGHGAR4 (AR4GWP100)  Kyoto greenhouse gases (AR4)
-
-Table: Gas categories and underlying global warming potentials
-
-
-#### "unit"
-Units are of the form *Gg/Mt/... \<substance\> / yr* where substance is the entity or for CO$_2$ equivalent units *Gg/Mt/... CO2 / yr*. The CO$_2$-equivalent is calculated according to the global warming potential indicated by the entity (see above).
-
-
-#### "category (\<term\>)"
-Categories for emission as defined in terminology \<term\>. Terminology names are those used in the [climate_categories](https://github.com/pik-primap/climate_categories) package. If the terminology name contains *\_PRIMAP* is means that some (sub)categories have been added to the official IPCC category hierarchy. Added categories outside the hierarchy begin with the prefix *M*.
-
-#### "CategoryName"
-Original name of the category as presented in the submission.
-
-#### "CategoryNameTranslation"
-Optional column. In some cases original category names have been translated to english. In this case these translations are stored in this column.
-
-#### Remaining columns
-
-Years (depending on dataset)
 
 
 

+ 0 - 1
datasets/UNFCCC/no_updates_until.txt

@@ -3,4 +3,3 @@
 22/10/13: data is identical to 22/08/22
 22/10/29: data updates for BHS, LKA, MDA (just 0 instead of nan), MYS, PAK (new data), PRY (0 instead of nan), TCD (new data)
 23/01/23: data is identical to 22/10/29
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.helper.country_info.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.helper.country\_info
 .. automodule:: unfccc_ghg_data.helper.country_info
 
 .. currentmodule:: unfccc_ghg_data.helper.country_info
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.helper.folder_mapping.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.helper.folder\_mapping
 .. automodule:: unfccc_ghg_data.helper.folder_mapping
 
 .. currentmodule:: unfccc_ghg_data.helper.folder_mapping
-

+ 12 - 0
docs/source/api/unfccc_ghg_data.helper.functions_temp.rst

@@ -11,3 +11,15 @@ find\_and\_replace\_values
 ==========================
 
 .. autofunction:: find_and_replace_values
+
+
+assert\_values
+==============
+
+.. autofunction:: assert_values
+
+
+assert\_nan\_values
+===================
+
+.. autofunction:: assert_nan_values

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_raw_for_year.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_raw\_for\_year
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_raw_for_year
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_raw_for_year
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2021_specification.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_specifications.crf2021\_specification
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2021_specification
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2021_specification
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2022_specification.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_specifications.crf2022\_specification
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2022_specification
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2022_specification
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_aus_specification.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_specifications.crf2023\_aus\_specific
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_aus_specification
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_aus_specification
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_specification.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_specifications.crf2023\_specification
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_specification
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.crf2023_specification
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.crf_specifications.util.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.crf\_specifications.util
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.util
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.crf_specifications.util
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.read\_new\_unfccc\_crf\_for\_year
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.read\_new\_unfccc\_crf\_for\_year\_datalad
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.read_new_unfccc_crf_for_year_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.read\_unfccc\_crf\_submission
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.read\_unfccc\_crf\_submission\_datalad
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.read_unfccc_crf_submission_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_crf_reader.test_read_unfccc_crf_for_year.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_crf\_reader.test\_read\_unfccc\_crf\_for\_year
 .. automodule:: unfccc_ghg_data.unfccc_crf_reader.test_read_unfccc_crf_for_year
 
 .. currentmodule:: unfccc_ghg_data.unfccc_crf_reader.test_read_unfccc_crf_for_year
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.process\_unfccc\_di\_for\_country
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.process\_unfccc\_di\_for\_country\_datalad
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.process\_unfccc\_di\_for\_country\_group
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.process\_unfccc\_di\_for\_country\_group\_d
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.process_unfccc_di_for_country_group_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.read\_unfccc\_di\_for\_country
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.read\_unfccc\_di\_for\_country\_datalad
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.read\_unfccc\_di\_for\_country\_group
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group_datalad.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.read\_unfccc\_di\_for\_country\_group\_data
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group_datalad
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.read_unfccc_di_for_country_group_datalad
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_config.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_di\_reader.unfccc\_di\_reader\_config
 .. automodule:: unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_config
 
 .. currentmodule:: unfccc_ghg_data.unfccc_di_reader.unfccc_di_reader_config
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.download_annexI.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.download\_annexI
 .. automodule:: unfccc_ghg_data.unfccc_downloader.download_annexI
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.download_annexI
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.download_ndc.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.download\_ndc
 .. automodule:: unfccc_ghg_data.unfccc_downloader.download_ndc
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.download_ndc
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.download_nonannexI.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.download\_nonannexI
 .. automodule:: unfccc_ghg_data.unfccc_downloader.download_nonannexI
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.download_nonannexI
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_annexI.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.fetch\_submissions\_annexI
 .. automodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_annexI
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_annexI
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_bur.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.fetch\_submissions\_bur
 .. automodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_bur
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_bur
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_downloader.fetch_submissions_nc.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_downloader.fetch\_submissions\_nc
 .. automodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_nc
 
 .. currentmodule:: unfccc_ghg_data.unfccc_downloader.fetch_submissions_nc
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR4_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Argentina.read\_ARG\_BUR4\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR4_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Argentina.read_ARG_BUR4_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.config_chl_bur4.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Chile.config\_chl\_bur4
 .. automodule:: unfccc_ghg_data.unfccc_reader.Chile.config_chl_bur4
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Chile.config_chl_bur4
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR4_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Chile.read\_CHL\_BUR4\_from\_xlsx
 .. automodule:: unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR4_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR4_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR5_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Chile.read\_CHL\_BUR5\_from\_xlsx
 .. automodule:: unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR5_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Chile.read_CHL_BUR5_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Colombia.read_COL_BUR3_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Colombia.read\_COL\_BUR3\_from\_xlsx
 .. automodule:: unfccc_ghg_data.unfccc_reader.Colombia.read_COL_BUR3_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Colombia.read_COL_BUR3_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Indonesia.read_IDN_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Indonesia.read\_IDN\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Indonesia.read_IDN_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Indonesia.read_IDN_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Israel.read_ISR_BUR2_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Israel.read\_ISR\_BUR2\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Israel.read_ISR_BUR2_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Israel.read_ISR_BUR2_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur3.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Malaysia.config\_mys\_bur3
 .. automodule:: unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur3
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur3
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur4.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Malaysia.config\_mys\_bur4
 .. automodule:: unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur4
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Malaysia.config_mys_bur4
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Malaysia.read\_MYS\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR4_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Malaysia.read\_MYS\_BUR4\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR4_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Malaysia.read_MYS_BUR4_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Mexico.read_MEX_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Mexico.read\_MEX\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Mexico.read_MEX_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Mexico.read_MEX_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Montenegro.config_mne_bur3.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Montenegro.config\_mne\_bur3
 .. automodule:: unfccc_ghg_data.unfccc_reader.Montenegro.config_mne_bur3
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Montenegro.config_mne_bur3
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Montenegro.read_MNE_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Montenegro.read\_MNE\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Montenegro.read_MNE_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Montenegro.read_MNE_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Morocco.config_mar_bur3.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Morocco.config\_mar\_bur3
 .. automodule:: unfccc_ghg_data.unfccc_reader.Morocco.config_mar_bur3
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Morocco.config_mar_bur3
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Morocco.read_MAR_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Morocco.read\_MAR\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Morocco.read_MAR_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Morocco.read_MAR_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Nigeria.config_nga_bur2.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Nigeria.config\_nga\_bur2
 .. automodule:: unfccc_ghg_data.unfccc_reader.Nigeria.config_nga_bur2
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Nigeria.config_nga_bur2
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Nigeria.read_NGA_BUR2_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Nigeria.read\_NGA\_BUR2\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Nigeria.read_NGA_BUR2_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Nigeria.read_NGA_BUR2_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Peru.config_per_bur3.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Peru.config\_per\_bur3
 .. automodule:: unfccc_ghg_data.unfccc_reader.Peru.config_per_bur3
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Peru.config_per_bur3
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Peru.read_PER_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Peru.read\_PER\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Peru.read_PER_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Peru.read_PER_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_kor_bur4.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.config\_kor\_bur4
 .. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_kor_bur4
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.config_kor_bur4
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2021_Inventory_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.read\_KOR\_2021\_Inventory\
 .. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2021_Inventory_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2021_Inventory_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2022_Inventory_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.read\_KOR\_2022\_Inventory\
 .. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2022_Inventory_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_2022_Inventory_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_BUR4_from_xlsx.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Republic\_of\_Korea.read\_KOR\_BUR4\_from\_xlsx
 .. automodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_BUR4_from_xlsx
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Republic_of_Korea.read_KOR_BUR4_from_xlsx
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Singapore.config_sgp_bur5.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Singapore.config\_sgp\_bur5
 .. automodule:: unfccc_ghg_data.unfccc_reader.Singapore.config_sgp_bur5
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Singapore.config_sgp_bur5
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Singapore.read_SGP_BUR5_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Singapore.read\_SGP\_BUR5\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Singapore.read_SGP_BUR5_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Singapore.read_SGP_BUR5_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2022_Inventory_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Taiwan.read\_TWN\_2022\_Inventory\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2022_Inventory_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Taiwan.read_TWN_2022_Inventory_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur3.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Thailand.config\_tha\_bur3
 .. automodule:: unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur3
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur3
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur4.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Thailand.config\_tha\_bur4
 .. automodule:: unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur4
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Thailand.config_tha_bur4
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR3_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Thailand.read\_THA\_BUR3\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR3_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR3_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR4_from_pdf.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.Thailand.read\_THA\_BUR4\_from\_pdf
 .. automodule:: unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR4_from_pdf
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.Thailand.read_THA_BUR4_from_pdf
-

+ 0 - 1
docs/source/api/unfccc_ghg_data.unfccc_reader.read_UNFCCC_submission.rst

@@ -4,4 +4,3 @@ unfccc\_ghg\_data.unfccc\_reader.read\_UNFCCC\_submission
 .. automodule:: unfccc_ghg_data.unfccc_reader.read_UNFCCC_submission
 
 .. currentmodule:: unfccc_ghg_data.unfccc_reader.read_UNFCCC_submission
-

+ 2 - 0
docs/source/conf.py

@@ -58,6 +58,8 @@ extensions = [
     "sphinx.ext.mathjax",
     # execute code
     "sphinx_exec_code",
+    # tables in markdown documents
+    "sphinx_markdown_tables",
 ]
 
 # general sphinx settings

+ 68 - 0
docs/source/data_format.md

@@ -0,0 +1,68 @@
+# Data format description (columns)
+
+All data in this repository in the comma-separated values (CSV) files is formatted consistently with the PRIMAP2 interchange format. A yaml file with metadata accompanies each csv file. Datasets are also available in the native primap2 format which is based on netcdf. For a description please consult the [primap2 documentation](https://primap2.readthedocs.io/en/main/data_format_examples.html).
+
+For the csf files the data contained in each column is as follows:
+
+## "source"
+Name of the data source. Four country specific datasets it is `\<ISO3\>-GHG-inventory`, where `\<ISO3\>` is the ISO 3166 three-letter country code. Processed submission data uses the source `BUR_NUR` so it can easily be combined into a single source for several countries. Data from the UNFCCC DI portal and CRF data use the source `UNFCCC` both for single countries and datasets for all countries or country groups.
+
+## "scenario (\<terminology>\)"
+The scenario specifies the submissions (e.g. BUR1, NC5, or Inventory_2021 for a non-UNFCCC inventory). The scenario terminology is `PRIMAP`.
+
+For data from the UNFCCC DI interface uses the scenario is `DIYYYY-MM-DD` where `YYYY-MM-DD` is the data the DI data was accessed or processed. The terminology for DI data is `Access_Date` for raw data and `Process_Date` for processed data.
+
+## "provenance"
+Provenance of the data. "measured" for submissions and "derived" for composite sources.
+
+## "country (ISO3)"
+ISO 3166 three-letter country codes.
+
+## "entity"
+Gas categories using global warming potentials (GWP) from either Second (SAR), Fourth (AR4), Fifth (AR5), or Sixths Assessment Report (AR6).
+
+Code                     Description
+----                     -----------
+CH4                      Methane
+CO2                      Carbon Dioxide
+N2O                      Nitrous Oxide
+HFCS (SARGWP100)         Hydrofluorocarbons (SAR)
+HFCS (AR4GWP100)         Hydrofluorocarbons (AR4)
+HFCS (AR5GWP100)         Hydrofluorocarbons (AR5)
+HFCS (AR6GWP100)         Hydrofluorocarbons (AR6)
+PFCS (SARGWP100)         Perfluorocarbons (SAR)
+PFCS (AR4GWP100)         Perfluorocarbons (AR4)
+PFCS (AR5GWP100)         Perfluorocarbons (AR5)
+PFCS (AR6GWP100)         Perfluorocarbons (AR6)
+SF6                      Sulfur Hexafluoride
+NF3                      Nitrogen Trifluoride
+FGASES (SARGWP100)       Fluorinated Gases (SAR): HFCs, PFCs, SF$_6$, NF$_3$
+FGASES (AR4GWP100)       Fluorinated Gases (AR4): HFCs, PFCs, SF$_6$, NF$_3$
+FGASES (AR5GWP100)       Fluorinated Gases (AR5): HFCs, PFCs, SF$_6$, NF$_3$
+FGASES (AR6GWP100)       Fluorinated Gases (AR6): HFCs, PFCs, SF$_6$, NF$_3$
+KYOTOGHG (SARGWP100)     Kyoto greenhouse gases (SAR)
+KYOTOGHG (AR4GWP100)     Kyoto greenhouse gases (AR4)
+KYOTOGHG (AR5GWP100)     Kyoto greenhouse gases (AR5)
+KYOTOGHG (AR6GWP100)     Kyoto greenhouse gases (AR6)
+
+Table: Gas categories and underlying global warming potentials
+
+Some datasets also contain individual fluorinated gases from the HFC and PFC baskets.
+
+
+## "unit"
+Units are of the form *Gg/Mt/... \<substance\> / yr* where substance is the entity or for CO$_2$ equivalent units *Gg/Mt/... CO2 / yr*. The CO$_2$-equivalent is calculated according to the global warming potential indicated by the entity (see above).
+
+
+## "category (\<term\>)"
+Categories for emission as defined in terminology \<term\>. Terminology names are those used in the [climate_categories](https://github.com/pik-primap/climate_categories) package. If the terminology name contains *\_PRIMAP* is means that some (sub)categories have been added to the official IPCC category hierarchy. Added categories outside the hierarchy begin with the prefix *M*.
+
+## "CategoryName"
+optional column. Original name of the category as presented in the submission.
+
+## "CategoryNameTranslation"
+Optional column. In some cases original category names have been translated to english. In this case these translations are stored in this column.
+
+## Remaining columns
+
+Years (depending on dataset)

+ 9 - 7
docs/source/usage.md

@@ -3,6 +3,15 @@
 ## Overview
 Introduce readers and downloaders
 
+```{toctree}
+:caption: Contents
+:maxdepth: 2
+repository_structure
+data_format
+
+```
+
+old below to be reorganized
 
 ## Datalad etc
 ```{include} ../../README.md
@@ -19,10 +28,3 @@ Introduce readers and downloaders
 ## CRF reader
 
 ## DI reader
-
-```{toctree}
-:caption: Contents
-:maxdepth: 2
-repository_structure
-
-```

+ 1 - 1
downloaded_data/UNFCCC/folder_mapping.json

@@ -207,4 +207,4 @@
     "ZAF": "South_Africa",
     "ZMB": "Zambia",
     "ZWE": "Zimbabwe"
-}
+}

+ 1 - 1
downloaded_data/non-UNFCCC/folder_mapping.json

@@ -3,4 +3,4 @@
     "KOR": "Republic_of_Korea",
     "PSE": "PSE",
     "TWN": "Taiwan"
-}
+}

+ 1 - 1
extracted_data/UNFCCC/folder_mapping.json

@@ -191,4 +191,4 @@
     "ZAF": "South_Africa",
     "ZMB": "Zambia",
     "ZWE": "Zimbabwe"
-}
+}

+ 1 - 1
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2023-Inventory_2023_IPCC1996_KOR_INV.yaml

@@ -1,5 +1,5 @@
 attrs:
-  references: 
+  references:
     http://www.gir.go.kr/home/board/read.do?pagerOffset=0&maxPageItems=10&maxIndexPages=10&searchKey=&searchValue=&menuId=36&boardId=62&boardMasterId=2&boardCategoryId=
   rights: ''
   contact: mail@johannes-guetschow.de

+ 1 - 1
extracted_data/non-UNFCCC/Republic_of_Korea/KOR_2023-Inventory_2023_IPCC2006_PRIMAP.yaml

@@ -1,5 +1,5 @@
 attrs:
-  references: 
+  references:
     http://www.gir.go.kr/home/board/read.do?pagerOffset=0&maxPageItems=10&maxIndexPages=10&searchKey=&searchValue=&menuId=36&boardId=62&boardMasterId=2&boardCategoryId=
   rights: ''
   contact: mail@johannes-guetschow.de

+ 1 - 1
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_1996_Taiwan_Inv.yaml

@@ -1,5 +1,5 @@
 attrs:
-  references: 
+  references:
     https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html
   rights: ''
   contact: mail@johannes-guetschow.de

+ 1 - 1
extracted_data/non-UNFCCC/Taiwan/TWN_inventory_2023_IPCC2006_PRIMAP.yaml

@@ -1,5 +1,5 @@
 attrs:
-  references: 
+  references:
     https://www.cca.gov.tw/information-service/publications/national-ghg-inventory-report/1851.html
   rights: ''
   contact: mail@johannes-guetschow.de

+ 1 - 1
extracted_data/non-UNFCCC/folder_mapping.json

@@ -1,4 +1,4 @@
 {
     "KOR": "Republic_of_Korea",
     "TWN": "Taiwan"
-}
+}

+ 1 - 1
legacy_data/UNFCCC/folder_mapping.json

@@ -16,4 +16,4 @@
     "SRB": "Serbia",
     "TGO": "Togo",
     "ZAF": "South_Africa"
-}
+}

+ 138 - 129
poetry.lock

@@ -748,7 +748,6 @@ colorama = {version = "*", markers = "platform_system == \"Windows\""}
 distro = {version = "*", markers = "python_version >= \"3.8\""}
 fasteners = ">=0.14"
 humanize = "*"
-importlib-metadata = {version = ">=3.6", markers = "python_version < \"3.10\""}
 iso8601 = "*"
 keyring = ">=20.0,<23.9.0 || >23.9.0"
 "keyrings.alt" = "*"
@@ -918,9 +917,6 @@ files = [
     {file = "fake_useragent-1.5.1-py3-none-any.whl", hash = "sha256:57415096557c8a4e23b62a375c21c55af5fd4ba30549227f562d2c4f5b60e3b3"},
 ]
 
-[package.dependencies]
-importlib-resources = {version = ">=5.0", markers = "python_version < \"3.10\""}
-
 [[package]]
 name = "fasteners"
 version = "0.19"
@@ -1029,13 +1025,13 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"]
 
 [[package]]
 name = "fsspec"
-version = "2024.3.1"
+version = "2024.5.0"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "fsspec-2024.3.1-py3-none-any.whl", hash = "sha256:918d18d41bf73f0e2b261824baeb1b124bcf771767e3a26425cd7dec3332f512"},
-    {file = "fsspec-2024.3.1.tar.gz", hash = "sha256:f39780e282d7d117ffb42bb96992f8a90795e4d0fb0f661a70ca39fe9c43ded9"},
+    {file = "fsspec-2024.5.0-py3-none-any.whl", hash = "sha256:e0fdbc446d67e182f49a70b82cf7889028a63588fde6b222521f10937b2b670c"},
+    {file = "fsspec-2024.5.0.tar.gz", hash = "sha256:1d021b0b0f933e3b3029ed808eb400c08ba101ca2de4b3483fbc9ca23fcee94a"},
 ]
 
 [package.extras]
@@ -1043,7 +1039,7 @@ abfs = ["adlfs"]
 adl = ["adlfs"]
 arrow = ["pyarrow (>=1)"]
 dask = ["dask", "distributed"]
-devel = ["pytest", "pytest-cov"]
+dev = ["pre-commit", "ruff"]
 dropbox = ["dropbox", "dropboxdrivefs", "requests"]
 full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "dask", "distributed", "dropbox", "dropboxdrivefs", "fusepy", "gcsfs", "libarchive-c", "ocifs", "panel", "paramiko", "pyarrow (>=1)", "pygit2", "requests", "s3fs", "smbprotocol", "tqdm"]
 fuse = ["fusepy"]
@@ -1060,6 +1056,9 @@ s3 = ["s3fs"]
 sftp = ["paramiko"]
 smb = ["smbprotocol"]
 ssh = ["paramiko"]
+test = ["aiohttp (!=4.0.0a0,!=4.0.0a1)", "numpy", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "requests"]
+test-downstream = ["aiobotocore (>=2.5.4,<3.0.0)", "dask-expr", "dask[dataframe,test]", "moto[server] (>4,<5)", "pytest-timeout", "xarray"]
+test-full = ["adlfs", "aiohttp (!=4.0.0a0,!=4.0.0a1)", "cloudpickle", "dask", "distributed", "dropbox", "dropboxdrivefs", "fastparquet", "fusepy", "gcsfs", "jinja2", "kerchunk", "libarchive-c", "lz4", "notebook", "numpy", "ocifs", "pandas", "panel", "paramiko", "pyarrow", "pyarrow (>=1)", "pyftpdlib", "pygit2", "pytest", "pytest-asyncio (!=0.22.0)", "pytest-benchmark", "pytest-cov", "pytest-mock", "pytest-recording", "pytest-rerunfailures", "python-snappy", "requests", "smbprotocol", "tqdm", "urllib3", "zarr", "zstandard"]
 tqdm = ["tqdm"]
 
 [[package]]
@@ -1289,24 +1288,6 @@ docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.link
 perf = ["ipython"]
 testing = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-perf (>=0.9.2)", "pytest-ruff (>=0.2.1)"]
 
-[[package]]
-name = "importlib-resources"
-version = "6.4.0"
-description = "Read resources from Python packages"
-optional = false
-python-versions = ">=3.8"
-files = [
-    {file = "importlib_resources-6.4.0-py3-none-any.whl", hash = "sha256:50d10f043df931902d4194ea07ec57960f66a80449ff867bfe782b4c486ba78c"},
-    {file = "importlib_resources-6.4.0.tar.gz", hash = "sha256:cdb2b453b8046ca4e3798eb1d84f3cce1446a0e8e7b5ef4efb600f19fc398145"},
-]
-
-[package.dependencies]
-zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""}
-
-[package.extras]
-docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["jaraco.test (>=5.4)", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy", "pytest-ruff (>=0.2.1)", "zipp (>=3.17)"]
-
 [[package]]
 name = "incremental"
 version = "22.10.0"
@@ -1368,13 +1349,13 @@ test = ["flaky", "ipyparallel", "pre-commit", "pytest (>=7.0)", "pytest-asyncio
 
 [[package]]
 name = "ipython"
-version = "8.18.1"
+version = "8.24.0"
 description = "IPython: Productive Interactive Computing"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 files = [
-    {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"},
-    {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"},
+    {file = "ipython-8.24.0-py3-none-any.whl", hash = "sha256:d7bf2f6c4314984e3e02393213bab8703cf163ede39672ce5918c51fe253a2a3"},
+    {file = "ipython-8.24.0.tar.gz", hash = "sha256:010db3f8a728a578bb641fdd06c063b9fb8e96a9464c63aec6310fbcb5e80501"},
 ]
 
 [package.dependencies]
@@ -1383,25 +1364,26 @@ decorator = "*"
 exceptiongroup = {version = "*", markers = "python_version < \"3.11\""}
 jedi = ">=0.16"
 matplotlib-inline = "*"
-pexpect = {version = ">4.3", markers = "sys_platform != \"win32\""}
+pexpect = {version = ">4.3", markers = "sys_platform != \"win32\" and sys_platform != \"emscripten\""}
 prompt-toolkit = ">=3.0.41,<3.1.0"
 pygments = ">=2.4.0"
 stack-data = "*"
-traitlets = ">=5"
-typing-extensions = {version = "*", markers = "python_version < \"3.10\""}
+traitlets = ">=5.13.0"
+typing-extensions = {version = ">=4.6", markers = "python_version < \"3.12\""}
 
 [package.extras]
-all = ["black", "curio", "docrepr", "exceptiongroup", "ipykernel", "ipyparallel", "ipywidgets", "matplotlib", "matplotlib (!=3.2.0)", "nbconvert", "nbformat", "notebook", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "qtconsole", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "trio", "typing-extensions"]
+all = ["ipython[black,doc,kernel,matplotlib,nbconvert,nbformat,notebook,parallel,qtconsole]", "ipython[test,test-extra]"]
 black = ["black"]
-doc = ["docrepr", "exceptiongroup", "ipykernel", "matplotlib", "pickleshare", "pytest (<7)", "pytest (<7.1)", "pytest-asyncio (<0.22)", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "stack-data", "testpath", "typing-extensions"]
+doc = ["docrepr", "exceptiongroup", "ipykernel", "ipython[test]", "matplotlib", "setuptools (>=18.5)", "sphinx (>=1.3)", "sphinx-rtd-theme", "sphinxcontrib-jquery", "stack-data", "typing-extensions"]
 kernel = ["ipykernel"]
+matplotlib = ["matplotlib"]
 nbconvert = ["nbconvert"]
 nbformat = ["nbformat"]
 notebook = ["ipywidgets", "notebook"]
 parallel = ["ipyparallel"]
 qtconsole = ["qtconsole"]
-test = ["pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath"]
-test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.22)", "pandas", "pickleshare", "pytest (<7.1)", "pytest-asyncio (<0.22)", "testpath", "trio"]
+test = ["pickleshare", "pytest", "pytest-asyncio (<0.22)", "testpath"]
+test-extra = ["curio", "ipython[test]", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.23)", "pandas", "trio"]
 
 [[package]]
 name = "iso8601"
@@ -1593,7 +1575,6 @@ files = [
 ]
 
 [package.dependencies]
-importlib-metadata = {version = ">=4.8.3", markers = "python_version < \"3.10\""}
 jupyter-core = ">=4.12,<5.0.dev0 || >=5.1.dev0"
 python-dateutil = ">=2.8.2"
 pyzmq = ">=23.0"
@@ -1865,6 +1846,21 @@ files = [
     {file = "looseversion-1.3.0.tar.gz", hash = "sha256:ebde65f3f6bb9531a81016c6fef3eb95a61181adc47b7f949e9c0ea47911669e"},
 ]
 
+[[package]]
+name = "markdown"
+version = "3.6"
+description = "Python implementation of John Gruber's Markdown."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "Markdown-3.6-py3-none-any.whl", hash = "sha256:48f276f4d8cfb8ce6527c8f79e2ee29708508bf4d40aa410fbc3b4ee832c850f"},
+    {file = "Markdown-3.6.tar.gz", hash = "sha256:ed4f41f6daecbeeb96e576ce414c41d2d876daa9a16cb35fa8ed8c2ddfad0224"},
+]
+
+[package.extras]
+docs = ["mdx-gh-links (>=0.2)", "mkdocs (>=1.5)", "mkdocs-gen-files", "mkdocs-literate-nav", "mkdocs-nature (>=0.6)", "mkdocs-section-index", "mkdocstrings[python]"]
+testing = ["coverage", "pyyaml"]
+
 [[package]]
 name = "markdown-it-py"
 version = "2.2.0"
@@ -1960,53 +1956,56 @@ files = [
 
 [[package]]
 name = "matplotlib"
-version = "3.8.4"
+version = "3.9.0"
 description = "Python plotting package"
 optional = false
 python-versions = ">=3.9"
 files = [
-    {file = "matplotlib-3.8.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:abc9d838f93583650c35eca41cfcec65b2e7cb50fd486da6f0c49b5e1ed23014"},
-    {file = "matplotlib-3.8.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f65c9f002d281a6e904976007b2d46a1ee2bcea3a68a8c12dda24709ddc9106"},
-    {file = "matplotlib-3.8.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce1edd9f5383b504dbc26eeea404ed0a00656c526638129028b758fd43fc5f10"},
-    {file = "matplotlib-3.8.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ecd79298550cba13a43c340581a3ec9c707bd895a6a061a78fa2524660482fc0"},
-    {file = "matplotlib-3.8.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:90df07db7b599fe7035d2f74ab7e438b656528c68ba6bb59b7dc46af39ee48ef"},
-    {file = "matplotlib-3.8.4-cp310-cp310-win_amd64.whl", hash = "sha256:ac24233e8f2939ac4fd2919eed1e9c0871eac8057666070e94cbf0b33dd9c338"},
-    {file = "matplotlib-3.8.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:72f9322712e4562e792b2961971891b9fbbb0e525011e09ea0d1f416c4645661"},
-    {file = "matplotlib-3.8.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:232ce322bfd020a434caaffbd9a95333f7c2491e59cfc014041d95e38ab90d1c"},
-    {file = "matplotlib-3.8.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6addbd5b488aedb7f9bc19f91cd87ea476206f45d7116fcfe3d31416702a82fa"},
-    {file = "matplotlib-3.8.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cc4ccdc64e3039fc303defd119658148f2349239871db72cd74e2eeaa9b80b71"},
-    {file = "matplotlib-3.8.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b7a2a253d3b36d90c8993b4620183b55665a429da8357a4f621e78cd48b2b30b"},
-    {file = "matplotlib-3.8.4-cp311-cp311-win_amd64.whl", hash = "sha256:8080d5081a86e690d7688ffa542532e87f224c38a6ed71f8fbed34dd1d9fedae"},
-    {file = "matplotlib-3.8.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:6485ac1f2e84676cff22e693eaa4fbed50ef5dc37173ce1f023daef4687df616"},
-    {file = "matplotlib-3.8.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c89ee9314ef48c72fe92ce55c4e95f2f39d70208f9f1d9db4e64079420d8d732"},
-    {file = "matplotlib-3.8.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:50bac6e4d77e4262c4340d7a985c30912054745ec99756ce213bfbc3cb3808eb"},
-    {file = "matplotlib-3.8.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f51c4c869d4b60d769f7b4406eec39596648d9d70246428745a681c327a8ad30"},
-    {file = "matplotlib-3.8.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:b12ba985837e4899b762b81f5b2845bd1a28f4fdd1a126d9ace64e9c4eb2fb25"},
-    {file = "matplotlib-3.8.4-cp312-cp312-win_amd64.whl", hash = "sha256:7a6769f58ce51791b4cb8b4d7642489df347697cd3e23d88266aaaee93b41d9a"},
-    {file = "matplotlib-3.8.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:843cbde2f0946dadd8c5c11c6d91847abd18ec76859dc319362a0964493f0ba6"},
-    {file = "matplotlib-3.8.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:1c13f041a7178f9780fb61cc3a2b10423d5e125480e4be51beaf62b172413b67"},
-    {file = "matplotlib-3.8.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fb44f53af0a62dc80bba4443d9b27f2fde6acfdac281d95bc872dc148a6509cc"},
-    {file = "matplotlib-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:606e3b90897554c989b1e38a258c626d46c873523de432b1462f295db13de6f9"},
-    {file = "matplotlib-3.8.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9bb0189011785ea794ee827b68777db3ca3f93f3e339ea4d920315a0e5a78d54"},
-    {file = "matplotlib-3.8.4-cp39-cp39-win_amd64.whl", hash = "sha256:6209e5c9aaccc056e63b547a8152661324404dd92340a6e479b3a7f24b42a5d0"},
-    {file = "matplotlib-3.8.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c7064120a59ce6f64103c9cefba8ffe6fba87f2c61d67c401186423c9a20fd35"},
-    {file = "matplotlib-3.8.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0e47eda4eb2614300fc7bb4657fced3e83d6334d03da2173b09e447418d499f"},
-    {file = "matplotlib-3.8.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:493e9f6aa5819156b58fce42b296ea31969f2aab71c5b680b4ea7a3cb5c07d94"},
-    {file = "matplotlib-3.8.4.tar.gz", hash = "sha256:8aac397d5e9ec158960e31c381c5ffc52ddd52bd9a47717e2a694038167dffea"},
+    {file = "matplotlib-3.9.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2bcee1dffaf60fe7656183ac2190bd630842ff87b3153afb3e384d966b57fe56"},
+    {file = "matplotlib-3.9.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3f988bafb0fa39d1074ddd5bacd958c853e11def40800c5824556eb630f94d3b"},
+    {file = "matplotlib-3.9.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:fe428e191ea016bb278758c8ee82a8129c51d81d8c4bc0846c09e7e8e9057241"},
+    {file = "matplotlib-3.9.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eaf3978060a106fab40c328778b148f590e27f6fa3cd15a19d6892575bce387d"},
+    {file = "matplotlib-3.9.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:2e7f03e5cbbfacdd48c8ea394d365d91ee8f3cae7e6ec611409927b5ed997ee4"},
+    {file = "matplotlib-3.9.0-cp310-cp310-win_amd64.whl", hash = "sha256:13beb4840317d45ffd4183a778685e215939be7b08616f431c7795276e067463"},
+    {file = "matplotlib-3.9.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:063af8587fceeac13b0936c42a2b6c732c2ab1c98d38abc3337e430e1ff75e38"},
+    {file = "matplotlib-3.9.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9a2fa6d899e17ddca6d6526cf6e7ba677738bf2a6a9590d702c277204a7c6152"},
+    {file = "matplotlib-3.9.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:550cdda3adbd596078cca7d13ed50b77879104e2e46392dcd7c75259d8f00e85"},
+    {file = "matplotlib-3.9.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76cce0f31b351e3551d1f3779420cf8f6ec0d4a8cf9c0237a3b549fd28eb4abb"},
+    {file = "matplotlib-3.9.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c53aeb514ccbbcbab55a27f912d79ea30ab21ee0531ee2c09f13800efb272674"},
+    {file = "matplotlib-3.9.0-cp311-cp311-win_amd64.whl", hash = "sha256:a5be985db2596d761cdf0c2eaf52396f26e6a64ab46bd8cd810c48972349d1be"},
+    {file = "matplotlib-3.9.0-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:c79f3a585f1368da6049318bdf1f85568d8d04b2e89fc24b7e02cc9b62017382"},
+    {file = "matplotlib-3.9.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bdd1ecbe268eb3e7653e04f451635f0fb0f77f07fd070242b44c076c9106da84"},
+    {file = "matplotlib-3.9.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d38e85a1a6d732f645f1403ce5e6727fd9418cd4574521d5803d3d94911038e5"},
+    {file = "matplotlib-3.9.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0a490715b3b9984fa609116481b22178348c1a220a4499cda79132000a79b4db"},
+    {file = "matplotlib-3.9.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8146ce83cbc5dc71c223a74a1996d446cd35cfb6a04b683e1446b7e6c73603b7"},
+    {file = "matplotlib-3.9.0-cp312-cp312-win_amd64.whl", hash = "sha256:d91a4ffc587bacf5c4ce4ecfe4bcd23a4b675e76315f2866e588686cc97fccdf"},
+    {file = "matplotlib-3.9.0-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:616fabf4981a3b3c5a15cd95eba359c8489c4e20e03717aea42866d8d0465956"},
+    {file = "matplotlib-3.9.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cd53c79fd02f1c1808d2cfc87dd3cf4dbc63c5244a58ee7944497107469c8d8a"},
+    {file = "matplotlib-3.9.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:06a478f0d67636554fa78558cfbcd7b9dba85b51f5c3b5a0c9be49010cf5f321"},
+    {file = "matplotlib-3.9.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:81c40af649d19c85f8073e25e5806926986806fa6d54be506fbf02aef47d5a89"},
+    {file = "matplotlib-3.9.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:52146fc3bd7813cc784562cb93a15788be0b2875c4655e2cc6ea646bfa30344b"},
+    {file = "matplotlib-3.9.0-cp39-cp39-win_amd64.whl", hash = "sha256:0fc51eaa5262553868461c083d9adadb11a6017315f3a757fc45ec6ec5f02888"},
+    {file = "matplotlib-3.9.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bd4f2831168afac55b881db82a7730992aa41c4f007f1913465fb182d6fb20c0"},
+    {file = "matplotlib-3.9.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:290d304e59be2b33ef5c2d768d0237f5bd132986bdcc66f80bc9bcc300066a03"},
+    {file = "matplotlib-3.9.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ff2e239c26be4f24bfa45860c20ffccd118d270c5b5d081fa4ea409b5469fcd"},
+    {file = "matplotlib-3.9.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:af4001b7cae70f7eaacfb063db605280058246de590fa7874f00f62259f2df7e"},
+    {file = "matplotlib-3.9.0.tar.gz", hash = "sha256:e6d29ea6c19e34b30fb7d88b7081f869a03014f66fe06d62cc77d5a6ea88ed7a"},
 ]
 
 [package.dependencies]
 contourpy = ">=1.0.1"
 cycler = ">=0.10"
 fonttools = ">=4.22.0"
-importlib-resources = {version = ">=3.2.0", markers = "python_version < \"3.10\""}
 kiwisolver = ">=1.3.1"
-numpy = ">=1.21"
+numpy = ">=1.23"
 packaging = ">=20.0"
 pillow = ">=8"
 pyparsing = ">=2.3.1"
 python-dateutil = ">=2.7"
 
+[package.extras]
+dev = ["meson-python (>=0.13.1)", "numpy (>=1.25)", "pybind11 (>=2.6)", "setuptools (>=64)", "setuptools_scm (>=7)"]
+
 [[package]]
 name = "matplotlib-inline"
 version = "0.1.7"
@@ -2370,10 +2369,8 @@ files = [
 
 [package.dependencies]
 numpy = [
-    {version = ">=1.21.0", markers = "python_version == \"3.9\" and platform_system == \"Darwin\" and platform_machine == \"arm64\""},
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""},
-    {version = ">=1.19.3", markers = "platform_system == \"Linux\" and platform_machine == \"aarch64\" and python_version >= \"3.8\" and python_version < \"3.10\" or python_version > \"3.9\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_system != \"Darwin\" and python_version < \"3.10\" or python_version >= \"3.9\" and platform_machine != \"arm64\" and python_version < \"3.10\""},
 ]
 
 [[package]]
@@ -2538,13 +2535,13 @@ complete = ["blosc", "numpy (>=1.20.0)", "pandas (>=1.3)", "pyzmq"]
 
 [[package]]
 name = "patool"
-version = "1.15.0"
+version = "2.2.0"
 description = "portable archive file manager"
 optional = false
-python-versions = ">=3.9"
+python-versions = ">=3.10"
 files = [
-    {file = "patool-1.15.0-py2.py3-none-any.whl", hash = "sha256:b14fd047571f71220de5d220bac9974fa2f423cce3c42cf0a85f54f155c30f09"},
-    {file = "patool-1.15.0.tar.gz", hash = "sha256:d94814a1c92d111089001f6a67b058de29ae7033e172503af1acb452048c8a69"},
+    {file = "patool-2.2.0-py2.py3-none-any.whl", hash = "sha256:21db6cc2fcd77acd37768258d1ad5aa3df0f676331fd80dfb1eb628626bc9155"},
+    {file = "patool-2.2.0.tar.gz", hash = "sha256:7767a747b24fbaa6ecc53579debc18358b6bc792ddb57e0669784c5b29af4a73"},
 ]
 
 [[package]]
@@ -2711,13 +2708,13 @@ xarray = ">=0.16.1"
 
 [[package]]
 name = "platformdirs"
-version = "4.2.1"
+version = "4.2.2"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "platformdirs-4.2.1-py3-none-any.whl", hash = "sha256:17d5a1161b3fd67b390023cb2d3b026bbd40abde6fdb052dfbd3a29c3ba22ee1"},
-    {file = "platformdirs-4.2.1.tar.gz", hash = "sha256:031cd18d4ec63ec53e82dceaac0417d218a6863f7745dfcc9efe7793b7039bdf"},
+    {file = "platformdirs-4.2.2-py3-none-any.whl", hash = "sha256:2d7a1657e36a80ea911db832a8a6ece5ee53d8de21edd5cc5879af6530b1bfee"},
+    {file = "platformdirs-4.2.2.tar.gz", hash = "sha256:38b7b51f512eed9e84a22788b4bce1de17c0adb134d6becb09836e37d8654cd3"},
 ]
 
 [package.extras]
@@ -2881,47 +2878,47 @@ tests = ["pytest"]
 
 [[package]]
 name = "pyarrow"
-version = "16.0.0"
+version = "16.1.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "pyarrow-16.0.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:22a1fdb1254e5095d629e29cd1ea98ed04b4bbfd8e42cc670a6b639ccc208b60"},
-    {file = "pyarrow-16.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:574a00260a4ed9d118a14770edbd440b848fcae5a3024128be9d0274dbcaf858"},
-    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c0815d0ddb733b8c1b53a05827a91f1b8bde6240f3b20bf9ba5d650eb9b89cdf"},
-    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:df0080339387b5d30de31e0a149c0c11a827a10c82f0c67d9afae3981d1aabb7"},
-    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:edf38cce0bf0dcf726e074159c60516447e4474904c0033f018c1f33d7dac6c5"},
-    {file = "pyarrow-16.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:91d28f9a40f1264eab2af7905a4d95320ac2f287891e9c8b0035f264fe3c3a4b"},
-    {file = "pyarrow-16.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:99af421ee451a78884d7faea23816c429e263bd3618b22d38e7992c9ce2a7ad9"},
-    {file = "pyarrow-16.0.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d22d0941e6c7bafddf5f4c0662e46f2075850f1c044bf1a03150dd9e189427ce"},
-    {file = "pyarrow-16.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:266ddb7e823f03733c15adc8b5078db2df6980f9aa93d6bb57ece615df4e0ba7"},
-    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cc23090224b6594f5a92d26ad47465af47c1d9c079dd4a0061ae39551889efe"},
-    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56850a0afe9ef37249d5387355449c0f94d12ff7994af88f16803a26d38f2016"},
-    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:705db70d3e2293c2f6f8e84874b5b775f690465798f66e94bb2c07bab0a6bb55"},
-    {file = "pyarrow-16.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:5448564754c154997bc09e95a44b81b9e31ae918a86c0fcb35c4aa4922756f55"},
-    {file = "pyarrow-16.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:729f7b262aa620c9df8b9967db96c1575e4cfc8c25d078a06968e527b8d6ec05"},
-    {file = "pyarrow-16.0.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:fb8065dbc0d051bf2ae2453af0484d99a43135cadabacf0af588a3be81fbbb9b"},
-    {file = "pyarrow-16.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:20ce707d9aa390593ea93218b19d0eadab56390311cb87aad32c9a869b0e958c"},
-    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5823275c8addbbb50cd4e6a6839952682a33255b447277e37a6f518d6972f4e1"},
-    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1ab8b9050752b16a8b53fcd9853bf07d8daf19093533e990085168f40c64d978"},
-    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:42e56557bc7c5c10d3e42c3b32f6cff649a29d637e8f4e8b311d334cc4326730"},
-    {file = "pyarrow-16.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:2a7abdee4a4a7cfa239e2e8d721224c4b34ffe69a0ca7981354fe03c1328789b"},
-    {file = "pyarrow-16.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:ef2f309b68396bcc5a354106741d333494d6a0d3e1951271849787109f0229a6"},
-    {file = "pyarrow-16.0.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:ed66e5217b4526fa3585b5e39b0b82f501b88a10d36bd0d2a4d8aa7b5a48e2df"},
-    {file = "pyarrow-16.0.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:cc8814310486f2a73c661ba8354540f17eef51e1b6dd090b93e3419d3a097b3a"},
-    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3c2f5e239db7ed43e0ad2baf46a6465f89c824cc703f38ef0fde927d8e0955f7"},
-    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f293e92d1db251447cb028ae12f7bc47526e4649c3a9924c8376cab4ad6b98bd"},
-    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:dd9334a07b6dc21afe0857aa31842365a62eca664e415a3f9536e3a8bb832c07"},
-    {file = "pyarrow-16.0.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:d91073d1e2fef2c121154680e2ba7e35ecf8d4969cc0af1fa6f14a8675858159"},
-    {file = "pyarrow-16.0.0-cp38-cp38-win_amd64.whl", hash = "sha256:71d52561cd7aefd22cf52538f262850b0cc9e4ec50af2aaa601da3a16ef48877"},
-    {file = "pyarrow-16.0.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:b93c9a50b965ee0bf4fef65e53b758a7e8dcc0c2d86cebcc037aaaf1b306ecc0"},
-    {file = "pyarrow-16.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:d831690844706e374c455fba2fb8cfcb7b797bfe53ceda4b54334316e1ac4fa4"},
-    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:35692ce8ad0b8c666aa60f83950957096d92f2a9d8d7deda93fb835e6053307e"},
-    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9dd3151d098e56f16a8389c1247137f9e4c22720b01c6f3aa6dec29a99b74d80"},
-    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:bd40467bdb3cbaf2044ed7a6f7f251c8f941c8b31275aaaf88e746c4f3ca4a7a"},
-    {file = "pyarrow-16.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:00a1dcb22ad4ceb8af87f7bd30cc3354788776c417f493089e0a0af981bc8d80"},
-    {file = "pyarrow-16.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:fda9a7cebd1b1d46c97b511f60f73a5b766a6de4c5236f144f41a5d5afec1f35"},
-    {file = "pyarrow-16.0.0.tar.gz", hash = "sha256:59bb1f1edbbf4114c72415f039f1359f1a57d166a331c3229788ccbfbb31689a"},
+    {file = "pyarrow-16.1.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:17e23b9a65a70cc733d8b738baa6ad3722298fa0c81d88f63ff94bf25eaa77b9"},
+    {file = "pyarrow-16.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4740cc41e2ba5d641071d0ab5e9ef9b5e6e8c7611351a5cb7c1d175eaf43674a"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98100e0268d04e0eec47b73f20b39c45b4006f3c4233719c3848aa27a03c1aef"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f68f409e7b283c085f2da014f9ef81e885d90dcd733bd648cfba3ef265961848"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:a8914cd176f448e09746037b0c6b3a9d7688cef451ec5735094055116857580c"},
+    {file = "pyarrow-16.1.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:48be160782c0556156d91adbdd5a4a7e719f8d407cb46ae3bb4eaee09b3111bd"},
+    {file = "pyarrow-16.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:9cf389d444b0f41d9fe1444b70650fea31e9d52cfcb5f818b7888b91b586efff"},
+    {file = "pyarrow-16.1.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:d0ebea336b535b37eee9eee31761813086d33ed06de9ab6fc6aaa0bace7b250c"},
+    {file = "pyarrow-16.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2e73cfc4a99e796727919c5541c65bb88b973377501e39b9842ea71401ca6c1c"},
+    {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bf9251264247ecfe93e5f5a0cd43b8ae834f1e61d1abca22da55b20c788417f6"},
+    {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddf5aace92d520d3d2a20031d8b0ec27b4395cab9f74e07cc95edf42a5cc0147"},
+    {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:25233642583bf658f629eb230b9bb79d9af4d9f9229890b3c878699c82f7d11e"},
+    {file = "pyarrow-16.1.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:a33a64576fddfbec0a44112eaf844c20853647ca833e9a647bfae0582b2ff94b"},
+    {file = "pyarrow-16.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:185d121b50836379fe012753cf15c4ba9638bda9645183ab36246923875f8d1b"},
+    {file = "pyarrow-16.1.0-cp312-cp312-macosx_10_15_x86_64.whl", hash = "sha256:2e51ca1d6ed7f2e9d5c3c83decf27b0d17bb207a7dea986e8dc3e24f80ff7d6f"},
+    {file = "pyarrow-16.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:06ebccb6f8cb7357de85f60d5da50e83507954af617d7b05f48af1621d331c9a"},
+    {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b04707f1979815f5e49824ce52d1dceb46e2f12909a48a6a753fe7cafbc44a0c"},
+    {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0d32000693deff8dc5df444b032b5985a48592c0697cb6e3071a5d59888714e2"},
+    {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:8785bb10d5d6fd5e15d718ee1d1f914fe768bf8b4d1e5e9bf253de8a26cb1628"},
+    {file = "pyarrow-16.1.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:e1369af39587b794873b8a307cc6623a3b1194e69399af0efd05bb202195a5a7"},
+    {file = "pyarrow-16.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:febde33305f1498f6df85e8020bca496d0e9ebf2093bab9e0f65e2b4ae2b3444"},
+    {file = "pyarrow-16.1.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:b5f5705ab977947a43ac83b52ade3b881eb6e95fcc02d76f501d549a210ba77f"},
+    {file = "pyarrow-16.1.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:0d27bf89dfc2576f6206e9cd6cf7a107c9c06dc13d53bbc25b0bd4556f19cf5f"},
+    {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0d07de3ee730647a600037bc1d7b7994067ed64d0eba797ac74b2bc77384f4c2"},
+    {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fbef391b63f708e103df99fbaa3acf9f671d77a183a07546ba2f2c297b361e83"},
+    {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_aarch64.whl", hash = "sha256:19741c4dbbbc986d38856ee7ddfdd6a00fc3b0fc2d928795b95410d38bb97d15"},
+    {file = "pyarrow-16.1.0-cp38-cp38-manylinux_2_28_x86_64.whl", hash = "sha256:f2c5fb249caa17b94e2b9278b36a05ce03d3180e6da0c4c3b3ce5b2788f30eed"},
+    {file = "pyarrow-16.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:e6b6d3cd35fbb93b70ade1336022cc1147b95ec6af7d36906ca7fe432eb09710"},
+    {file = "pyarrow-16.1.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:18da9b76a36a954665ccca8aa6bd9f46c1145f79c0bb8f4f244f5f8e799bca55"},
+    {file = "pyarrow-16.1.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:99f7549779b6e434467d2aa43ab2b7224dd9e41bdde486020bae198978c9e05e"},
+    {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f07fdffe4fd5b15f5ec15c8b64584868d063bc22b86b46c9695624ca3505b7b4"},
+    {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ddfe389a08ea374972bd4065d5f25d14e36b43ebc22fc75f7b951f24378bf0b5"},
+    {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:3b20bd67c94b3a2ea0a749d2a5712fc845a69cb5d52e78e6449bbd295611f3aa"},
+    {file = "pyarrow-16.1.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:ba8ac20693c0bb0bf4b238751d4409e62852004a8cf031c73b0e0962b03e45e3"},
+    {file = "pyarrow-16.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:31a1851751433d89a986616015841977e0a188662fcffd1a5677453f1df2de0a"},
+    {file = "pyarrow-16.1.0.tar.gz", hash = "sha256:15fbb22ea96d11f0b5768504a3f961edab25eaf4197c341720c4a387f6c60315"},
 ]
 
 [package.dependencies]
@@ -3636,13 +3633,13 @@ jeepney = ">=0.6"
 
 [[package]]
 name = "selenium"
-version = "4.20.0"
+version = "4.21.0"
 description = ""
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "selenium-4.20.0-py3-none-any.whl", hash = "sha256:b1d0c33b38ca27d0499183e48e1dd09ff26973481f5d3ef2983073813ae6588d"},
-    {file = "selenium-4.20.0.tar.gz", hash = "sha256:0bd564ee166980d419a8aaf4ac00289bc152afcf2eadca5efe8c8e36711853fd"},
+    {file = "selenium-4.21.0-py3-none-any.whl", hash = "sha256:4770ffe5a5264e609de7dc914be6b89987512040d5a8efb2abb181330d097993"},
+    {file = "selenium-4.21.0.tar.gz", hash = "sha256:650dbfa5159895ff00ad16e5ddb6ceecb86b90c7ed2012b3f041f64e6e4904fe"},
 ]
 
 [package.dependencies]
@@ -3755,7 +3752,6 @@ babel = ">=2.9"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 docutils = ">=0.14,<0.20"
 imagesize = ">=1.3"
-importlib-metadata = {version = ">=4.8", markers = "python_version < \"3.10\""}
 Jinja2 = ">=3.0"
 packaging = ">=21.0"
 Pygments = ">=2.12"
@@ -3854,6 +3850,20 @@ files = [
     {file = "sphinx_exec_code-0.10-py3-none-any.whl", hash = "sha256:2597460a7062bfd8ef1b108a8cec1dc10250d56a19034830b038ac653dee1902"},
 ]
 
+[[package]]
+name = "sphinx-markdown-tables"
+version = "0.0.17"
+description = "A Sphinx extension for rendering tables written in markdown"
+optional = false
+python-versions = "*"
+files = [
+    {file = "sphinx-markdown-tables-0.0.17.tar.gz", hash = "sha256:6bc6d3d400eaccfeebd288446bc08dd83083367c58b85d40fe6c12d77ef592f1"},
+    {file = "sphinx_markdown_tables-0.0.17-py3-none-any.whl", hash = "sha256:2bd0c30779653e4dd120300cbd9ca412c480738cc2241f6dea477a883f299e04"},
+]
+
+[package.dependencies]
+markdown = ">=3.4"
+
 [[package]]
 name = "sphinxcontrib-applehelp"
 version = "1.0.8"
@@ -4148,7 +4158,6 @@ files = [
 
 [package.dependencies]
 click = "*"
-importlib-resources = {version = ">=5", markers = "python_version < \"3.10\""}
 incremental = "*"
 jinja2 = "*"
 tomli = {version = "*", markers = "python_version < \"3.11\""}
@@ -4207,13 +4216,13 @@ six = "*"
 
 [[package]]
 name = "trio"
-version = "0.25.0"
+version = "0.25.1"
 description = "A friendly Python library for async concurrency and I/O"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "trio-0.25.0-py3-none-any.whl", hash = "sha256:e6458efe29cc543e557a91e614e2b51710eba2961669329ce9c862d50c6e8e81"},
-    {file = "trio-0.25.0.tar.gz", hash = "sha256:9b41f5993ad2c0e5f62d0acca320ec657fdb6b2a2c22b8c7aed6caf154475c4e"},
+    {file = "trio-0.25.1-py3-none-any.whl", hash = "sha256:e42617ba091e7b2e50c899052e83a3c403101841de925187f61e7b7eaebdf3fb"},
+    {file = "trio-0.25.1.tar.gz", hash = "sha256:9f5314f014ea3af489e77b001861c535005c3858d38ec46b6b071ebfa339d7fb"},
 ]
 
 [package.dependencies]
@@ -4391,23 +4400,23 @@ viz = ["matplotlib", "nc-time-axis", "seaborn"]
 
 [[package]]
 name = "zipp"
-version = "3.18.1"
+version = "3.18.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
 files = [
-    {file = "zipp-3.18.1-py3-none-any.whl", hash = "sha256:206f5a15f2af3dbaee80769fb7dc6f249695e940acca08dfb2a4769fe61e538b"},
-    {file = "zipp-3.18.1.tar.gz", hash = "sha256:2884ed22e7d8961de1c9a05142eb69a247f120291bc0206a00a7642f09b5b715"},
+    {file = "zipp-3.18.2-py3-none-any.whl", hash = "sha256:dce197b859eb796242b0622af1b8beb0a722d52aa2f57133ead08edd5bf5374e"},
+    {file = "zipp-3.18.2.tar.gz", hash = "sha256:6278d9ddbcfb1f1089a88fde84481528b07b0e10474e09dcfe53dad4069fa059"},
 ]
 
 [package.extras]
 docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
-testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
+testing = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy", "pytest-ruff (>=0.2.1)"]
 
 [extras]
 plots = ["matplotlib"]
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.9, <3.11"
-content-hash = "5e7c15209b92f58e81b3190530856f6e4cf1b4b427c05d333b636ca51965439c"
+python-versions = ">=3.10, <3.11"
+content-hash = "ce0e86a46d676f0d40d64ed44a5fa95bbe577bcc7b892447770effc1110dc6ef"

+ 11 - 1
pyproject.toml

@@ -9,7 +9,7 @@ license = "TBD"
 include = ["LICENCE"]  # poetry uses US English so assumes it will be spelt LICENSE
 
 [tool.poetry.dependencies]
-python = ">=3.9, <3.11"
+python = ">=3.10, <3.11"
 matplotlib = { version = "^3.7.1", optional = true }
 doit = "^0.36.0"
 primap2 = ">=0.9.8"
@@ -25,6 +25,7 @@ unfccc-di-api = "^4.0.0"
 dask = "^2023.12.0"
 sphinx-exec-code = "^0.10"
 ghostscript = "^0.7"
+sphinx-markdown-tables = "^0.0.17"
 
 [tool.poetry.extras]
 plots = ["matplotlib"]
@@ -124,6 +125,15 @@ docstring-code-format = true
     "S101",  # S101 Use of `assert` detected
     "PLR2004" # Magic value used in comparison
 ]
+"src/unfccc_ghg_data/unfccc_reader/*/config_*.py" = [
+    "E501",  # don't enforce line length
+]
+"src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/*_specification.py" = [
+    "E501",  # don't enforce line length
+]
+"src/unfccc_ghg_data/unfccc_di_reader/unfccc_di_reader_config.py" = [
+    "E501",  # don't enforce line length
+]
 "docs/source/notebooks/*" = [
     "D100",  # Missing docstring at the top of file
     "E402",  # Module level import not at top of file

+ 7 - 4
src/unfccc_ghg_data/__init__.py

@@ -1,8 +1,11 @@
 """
-Reading country greenhouse gas data submitted to the United Nations Framework
-Convention on Climate Change (UNFCCC)in different submissions and formats and providing
-it in a standadized nc and csv format compatible with primap2. Data are read using
-different methods from APIs, xlsx and csv files as well as pdf files.
+Downloading and reading GHG data from the UNFCCC
+
+Downloading and reading country greenhouse gas data submitted to the United Nations
+Framework Convention on Climate Change (UNFCCC)in different submissions and
+formats and providing it in a standadized nc and csv format compatible with primap2.
+Data are read using different methods from APIs, xlsx and csv files as well as pdf
+files.
 """
 import importlib.metadata
 

+ 8 - 5
src/unfccc_ghg_data/helper/country_info.py

@@ -7,21 +7,24 @@ submissions and datasets both read and not read
 
 import argparse
 
-from unfccc_ghg_data.helper.functions import get_country_datasets, get_country_submissions
+from unfccc_ghg_data.helper.functions import (
+    get_country_datasets,
+    get_country_submissions,
+)
 
 if __name__ == "__main__":
     # Find the right function and possible input and output files and
     # read the data using datalad run.
     parser = argparse.ArgumentParser()
-    parser.add_argument('--country', help='Country name or code')
+    parser.add_argument("--country", help="Country name or code")
     args = parser.parse_args()
     country = args.country
 
     # print available submissions
-    print("="*15 + " Available submissions " + "="*15)
+    print("=" * 15 + " Available submissions " + "=" * 15)
     get_country_submissions(country, True)
     print("")
 
-    #print available datasets
-    print("="*15 + " Available datasets " + "="*15)
+    # print available datasets
+    print("=" * 15 + " Available datasets " + "=" * 15)
     get_country_datasets(country, True)

+ 189 - 10
src/unfccc_ghg_data/helper/functions.py

@@ -2,14 +2,15 @@
 
 Functions used by the different readers and downloaders in the unfccc_ghg_data package
 """
+from __future__ import annotations
 
 import copy
 import json
 import re
+import warnings
 from copy import deepcopy
 from datetime import date
 from pathlib import Path
-from typing import Optional, Union
 
 import numpy as np
 import pandas as pd
@@ -32,11 +33,11 @@ def process_data_for_country(  # noqa PLR0913, PLR0912, PLR0915
     data_country: xr.Dataset,
     entities_to_ignore: list[str],
     gas_baskets: dict[str, list[str]],
-    filter_dims: Optional[dict[str, list[str]]] = None,
-    cat_terminology_out: Optional[str] = None,
-    category_conversion: dict[str, dict] = None,
-    sectors_out: list[str] = None,
-    processing_info_country: dict = None,
+    filter_dims: dict[str, list[str]] | None = None,
+    cat_terminology_out: str | None = None,
+    category_conversion: dict[str, dict] | None = None,
+    sectors_out: list[str] | None = None,
+    processing_info_country: dict | None = None,
 ) -> xr.Dataset:
     """
     Process data from DI interface (where necessary).
@@ -818,7 +819,8 @@ def get_country_datasets(  # noqa: PLR0915, PLR0912
                     if cleaned_datasets_current_folder:
                         for country_ds in cleaned_datasets_current_folder:
                             print(
-                                f"{country_ds}: {cleaned_datasets_current_folder[country_ds]}"
+                                f"{country_ds}: "
+                                f"{cleaned_datasets_current_folder[country_ds]}"
                             )
                     else:
                         print("No data available")
@@ -891,7 +893,8 @@ def get_country_datasets(  # noqa: PLR0915, PLR0912
                     if cleaned_datasets_current_folder:
                         for country_ds in cleaned_datasets_current_folder:
                             print(
-                                f"{country_ds}: {cleaned_datasets_current_folder[country_ds]}"
+                                f"{country_ds}: "
+                                f"{cleaned_datasets_current_folder[country_ds]}"
                             )
                     else:
                         print("No data available")
@@ -1042,8 +1045,8 @@ def fix_rows(
 def make_wide_table(
     data: pd.DataFrame,
     keyword: str,
-    col: Union[int, str],
-    index_cols: list[Union[int, str]],
+    col: int | str,
+    index_cols: list[int | str],
 ) -> pd.DataFrame:
     """
     Make a wide table from a table which is a stack of tables for different time periods
@@ -1094,3 +1097,179 @@ def make_wide_table(
             else:
                 df_all = pd.concat([df_all, df_to_add], axis=1, join="outer")
         return df_all
+
+
+def find_and_replace_values(
+    df: pd.DataFrame,
+    replace_info: list[tuple[str | float]],
+    category_column: str,
+    entity_column: str = "entity",
+) -> pd.DataFrame:
+    """
+    Find values and replace single values in a dataframe.
+
+    Parameters
+    ----------
+    df
+        Input data frame
+    replace_info
+        Category, entity, year, and new value. Don't put a new value if you
+        would like to replace with nan.
+        For example [("3.C", "CO", "2019", 3.423)] or [("3.C", "CO", "2019")]
+    category_column
+        The name of the column that contains the categories.
+    entity_column
+        The name of the column that contains the categories.
+
+    Output
+    ------
+        Data frame with updated values.
+
+    """
+    for replace_info_value in replace_info:
+        category = replace_info_value[0]
+        entity = replace_info_value[1]
+        year = replace_info_value[2]
+
+        if len(replace_info_value) == 4:  # noqa: PLR2004
+            new_value = replace_info_value[3]
+        elif len(replace_info_value) == 3:  # noqa: PLR2004
+            new_value = np.nan
+        else:
+            raise AssertionError(  # noqa: TRY003
+                f"Expected tuple of length 3 or 4. Got {replace_info_value}"
+            )
+
+        index = df.loc[
+            (df[category_column] == category) & (df[entity_column] == entity),
+        ].index[0]
+
+        # pandas recommends using .at[] for changing single values
+        df.loc[index, year] = new_value
+        print(f"Set value for {category}, {entity}, {year} to {new_value}.")
+
+    return df
+
+
+def assert_values(
+    df: pd.DataFrame,
+    test_case: tuple[str | float | int],
+    category_column: str = "category (IPCC1996_2006_GIN_Inv)",
+    entity_column: str = "entity",
+) -> None:
+    """
+    Check if a value in a dataframe matches the expected value.
+
+    Parameters
+    ----------
+    df
+        The data frame to check.
+    test_case
+        The combination of parameters and the expected value.
+        Use the format (<category>, <entity>, <year>, <expected_value>).
+    category_column
+        The columns where to look for the category.
+    entity_column
+        The column where to look for the entity.
+    """
+    category = test_case[0]
+    entity = test_case[1]
+    year = test_case[2]
+    expected_value = test_case[3]
+
+    assert isinstance(  # noqa: S101
+        expected_value, (float, int)
+    ), (
+        "This function only works for numbers. "
+        "Use assert_nan_values to check for NaNs "
+        "and empty values."
+    )
+
+    arr = df.loc[
+        (df[category_column] == category) & (df[entity_column] == entity), year
+    ].to_numpy()
+
+    # Assert the category exists in the data frame
+    assert (  # noqa: S101
+        category in df[category_column].unique()
+    ), f"{category} is not a valid category. Choose from {df[category_column].unique()}"
+
+    # Assert the entity exists in the data frame
+    assert (  # noqa: S101
+        entity in df[entity_column].unique()
+    ), f"{entity} is not a valid entity. Choose from {df[entity_column].unique()}"
+
+    assert (  # noqa: S101
+        arr.size > 0
+    ), f"No value found for category {category}, entity {entity}, year {year}!"
+
+    assert (  # noqa: S101
+        arr.size <= 1
+    ), (
+        f"More than one value found for category {category}, entity {entity}, "
+        f"year {year}!"
+    )
+
+    assert (  # noqa: S101
+        arr[0] == test_case[3]
+    ), f"Expected value {expected_value}, actual value is {arr[0]}"
+
+    print(
+        f"Value for category {category}, entity {entity}, year {year} is as expected."
+    )
+
+
+def assert_nan_values(
+    df: pd.DataFrame,
+    test_case: tuple[str, ...],
+    category_column: str = "category (IPCC1996_2006_GIN_Inv)",
+    entity_column: str = "entity",
+) -> None:
+    """
+    Check for empty, NE, NE1 values
+
+    Check if values that are empty or NE or NE1 in the PDF tables
+    are not present in the dataset.
+
+    Parameters
+    ----------
+    df
+        The data frame to check.
+    test_case
+        The combination of input parameters.
+        Use the format (<category>, <entity>, <year>).
+    category_column
+        The columns where to look for the category.
+    entity_column
+        The column where to look for the entity.
+
+    """
+    category = test_case[0]
+    entity = test_case[1]
+    year = test_case[2]
+
+    if category not in df[category_column].unique():
+        warning_string = (
+            f"{category} is not in the data set. Either all values "
+            f"for this category are NaN or the category never "
+            f"existed in the data set."
+        )
+        warnings.warn(warning_string)
+        return
+
+    if entity not in df[entity_column].unique():
+        warning_string = (
+            f"{entity} is not in the data set. Either all values "
+            f"for this entity are NaN or the category never "
+            f"existed in the data set."
+        )
+        warnings.warn(warning_string)
+        return
+
+    arr = df.loc[
+        (df[category_column] == category) & (df[entity_column] == entity), year
+    ].to_numpy()
+
+    assert np.isnan(arr[0]), f"Value is {arr[0]} and not NaN."  # noqa: S101
+
+    print(f"Value for category {category}, entity {entity}, year {year} is NaN.")

+ 0 - 164
src/unfccc_ghg_data/helper/functions_temp.py

@@ -1,164 +0,0 @@
-"""
-Temporary file for new functions to avoid merging issues.
-
-due to different automatic formatting. Delete after merge.
-"""
-
-import pandas as pd
-import warnings
-import numpy as np
-
-
-
-def find_and_replace_values(
-    df: pd.DataFrame,
-    replace_info: list[tuple[str | float]],
-    category_column: str,
-    entity_column: str = "entity",
-) -> pd.DataFrame:
-    """
-    Find values and replace single values in a dataframe.
-
-    Input
-    -----
-    df
-        Input data frame
-    replace_info
-        Category, entity, year, and new value. Don't put a new value if you would like to replace with nan.
-        For example [("3.C", "CO", "2019", 3.423)] or [("3.C", "CO", "2019")]
-    category_column
-        The name of the column that contains the categories.
-    entity_column
-        The name of the column that contains the categories.
-
-    Output
-    ------
-        Data frame with updated values.
-
-    """
-    for replace_info_value in replace_info:
-        category = replace_info_value[0]
-        entity = replace_info_value[1]
-        year = replace_info_value[2]
-
-        if len(replace_info_value) == 4:  # noqa: PLR2004
-            new_value = replace_info_value[3]
-        elif len(replace_info_value) == 3:  # noqa: PLR2004
-            new_value = np.nan
-        else:
-            raise AssertionError(  # noqa: TRY003
-                f"Expected tuple of length 3 or 4. Got {replace_info_value}"
-            )
-
-        index = df.loc[
-            (df[category_column] == category) & (df[entity_column] == entity),
-        ].index[0]
-
-        # pandas recommends using .at[] for changing single values
-        df.loc[index, year] = new_value
-        print(f"Set value for {category}, {entity}, {year} to {new_value}.")
-
-    return df
-
-
-def assert_values(
-        df: pd.DataFrame,
-        test_case: tuple[str | float | int],
-        category_column: str = "category (IPCC1996_2006_GIN_Inv)",
-        entity_column: str = "entity",
-) -> None:
-    """
-    Check if a value in a dataframe matches the expected value.
-    Input
-    -----
-    df
-        The data frame to check.
-    test_case
-        The combination of parameters and the expected value.
-        Use the format (<category>, <entity>, <year>, <expected_value>).
-    category_column
-        The columns where to look for the category.
-    entity_column
-        The column where to look for the entity.
-    """
-    category = test_case[0]
-    entity = test_case[1]
-    year = test_case[2]
-    expected_value = test_case[3]
-
-    assert isinstance(expected_value, (float, int)), "This function only works for numbers. Use assert_nan_values to check for NaNs and empty values."
-
-    arr = df.loc[
-        (df[category_column] == category) & (df[entity_column] == entity), year
-    ].values
-
-    # Assert the category exists in the data frame
-    assert (
-            category in df[category_column].unique()
-    ), f"{category} is not a valid category. Choose from {df[category_column].unique()}"
-
-    # Assert the entity exists in the data frame
-    assert (
-            entity in df[entity_column].unique()
-    ), f"{entity} is not a valid entity. Choose from {df[entity_column].unique()}"
-
-    assert (
-            arr.size > 0
-    ), f"No value found for category {category}, entity {entity}, year {year}!"
-
-    assert (
-            arr.size <= 1
-    ), f"More than one value found for category {category}, entity {entity}, year {year}!"
-
-    assert (
-            arr[0] == test_case[3]
-    ), f"Expected value {expected_value}, actual value is {arr[0]}"
-
-    print(
-        f"Value for category {category}, entity {entity}, year {year} is as expected."
-    )
-
-def assert_nan_values(
-        df: pd.DataFrame,
-        test_case: tuple[str, ...],
-        category_column: str = "category (IPCC1996_2006_GIN_Inv)",
-        entity_column: str = "entity",
-) -> None:
-    """
-    Check if values that are empty or NE or NE1 in the PDF tables
-    are not present in the dataset.
-
-    Input
-    -----
-    df
-        The data frame to check.
-    test_case
-        The combination of input parameters.
-        Use the format (<category>, <entity>, <year>).
-    category_column
-        The columns where to look for the category.
-    entity_column
-        The column where to look for the entity.
-
-    """
-    category = test_case[0]
-    entity = test_case[1]
-    year = test_case[2]
-
-    if category not in df[category_column].unique():
-        warning_string = f"{category} is not in the data set. Either all values for this category are NaN or the category never existed in the data set."
-        warnings.warn(warning_string)
-        return
-
-    if entity not in df[entity_column].unique():
-        warning_string = f"{entity} is not in the data set. Either all values for this entity are NaN or the category never existed in the data set."
-        warnings.warn(warning_string)
-        return
-
-    arr = df.loc[
-        (df[category_column] == category) & (df[entity_column] == entity), year
-    ].values
-
-    assert np.isnan(arr[0]), f"Value is {arr[0]} and not NaN."
-
-    print(f"Value for category {category}, entity {entity}, year {year} is NaN.")

+ 6 - 6
src/unfccc_ghg_data/unfccc_crf_reader/__init__.py

@@ -2,12 +2,12 @@
 CRF reader module
 """
 
-#from pathlib import Path
+# from pathlib import Path
 from . import crf_specifications
 from .unfccc_crf_reader_prod import read_crf_for_country, read_crf_for_country_datalad
 
-__all__ = ["crf_specifications",
-           "read_crf_for_country",
-           "read_crf_for_country_datalad",
-           ]
-
+__all__ = [
+    "crf_specifications",
+    "read_crf_for_country",
+    "read_crf_for_country_datalad",
+]

+ 3 - 2
src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py

@@ -1,7 +1,7 @@
 """
 A script to collect all latest CRF submissions for a given year
 
-Reads the latest data fromt he extracted data folder for each country.
+Reads the latest data from the extracted data folder for each country.
 Notifies the user if new data are available in the downloaded_data folder
 which have not yet been read.
 
@@ -51,7 +51,8 @@ if __name__ == "__main__":
             )
             if not data_read:
                 print(f"Latest submission for {country} has not been read yet.")
-                # TODO: make sure an older submission is read if present. currently none is included at all
+                # TODO: make sure an older submission is read if present.
+                #  currently none is included at all
                 outdated_countries.append(country)
 
             # read the native format file

文件差异内容过多而无法显示
+ 528 - 481
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2021_specification.py


文件差异内容过多而无法显示
+ 528 - 481
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2022_specification.py


文件差异内容过多而无法显示
+ 528 - 481
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/crf2023_specification.py


+ 7 - 7
src/unfccc_ghg_data/unfccc_crf_reader/crf_specifications/util.py

@@ -16,7 +16,7 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "CO2 equivalent (kt)": "kt CO2eq",
-            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table2(II)
+            "CO2 equivalents (kt) (2)": "kt CO2eq",  # for AUS Table2(II)
         },
         "default_unit": "t",
     },
@@ -27,11 +27,11 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "CO2 equivalent (kt)": "kt CO2eq",
-            "CO2 equivalents (kt) (2) ": "kt CO2eq", # for AUS Table1
-            "CO2 equivalent (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
-            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS, Table2(I)
-            "CO2 equivalents (kt) (2)": "kt CO2eq", # for AUS Table3
-            "CO2 equivalents (kt) (4)": "kt CO2eq", # for AUS Table4
+            "CO2 equivalents (kt) (2) ": "kt CO2eq",  # for AUS Table1
+            "CO2 equivalent (kt) (3)": "kt CO2eq",  # for AUS, Table2(I)
+            "CO2 equivalents (kt) (3)": "kt CO2eq",  # for AUS, Table2(I)
+            "CO2 equivalents (kt) (2)": "kt CO2eq",  # for AUS Table3
+            "CO2 equivalents (kt) (4)": "kt CO2eq",  # for AUS Table4
         },
         "default_unit": "kt",
     },
@@ -42,7 +42,7 @@ unit_info = {
         "regexp_unit": r"\((.*)\)",
         "manual_repl_unit": {
             "(kt CO2 equivalent)": "kt CO2eq",
-            "CO2 equivalents (kt) (3)": "kt CO2eq", # for AUS
+            "CO2 equivalents (kt) (3)": "kt CO2eq",  # for AUS
         },
         "default_unit": "kt",
     },

+ 18 - 13
src/unfccc_ghg_data/unfccc_crf_reader/read_new_unfccc_crf_for_year.py

@@ -1,32 +1,37 @@
 """
-This script is a wrapper around the read_crf_for_country
+Wrapper for the read_crf_for_country
+
+Wrapper around the read_crf_for_country
 function such that it can be called from datalad
 """
 
 import argparse
 
-from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import read_new_crf_for_year
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import (
+    read_new_crf_for_year,
+)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    #parser.add_argument('--countries', help='List of country codes', default=None)
-    parser.add_argument('--submission_year', help='Submission round to read', type=int)
-    parser.add_argument('--submission_date', help='Date of submission to read', default=None)
-    parser.add_argument('--re_read', help='Read data also if already read before',
-                        action='store_true')
+    # parser.add_argument('--countries', help='List of country codes', default=None)
+    parser.add_argument("--submission_year", help="Submission round to read", type=int)
+    parser.add_argument(
+        "--submission_date", help="Date of submission to read", default=None
+    )
+    parser.add_argument(
+        "--re_read", help="Read data also if already read before", action="store_true"
+    )
 
     args = parser.parse_args()
 
-    #countries = args.countries
-    #if countries == "None":
+    # countries = args.countries
+    # if countries == "None":
     #    countries = None
     submission_year = args.submission_year
     re_read = args.re_read
     print(f"!!!!!!!!!!!!!!!!!!!!script: re_read={re_read}")
     read_new_crf_for_year(
         submission_year=int(submission_year),
-    #    countries=countries,
-        re_read=re_read
+        #    countries=countries,
+        re_read=re_read,
     )
-
-

+ 15 - 10
src/unfccc_ghg_data/unfccc_crf_reader/read_new_unfccc_crf_for_year_datalad.py

@@ -1,25 +1,30 @@
 """
-wrapper around read_crf_for_country_datalad such that it can be called
+Wrapper around read_crf_for_country_datalad
+
+Wrapper around read_crf_for_country_datalad such that it can be called
 from doit in the current setup where doit runs on system python and
 not in the venv.
 """
 
 import argparse
 
-from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import read_new_crf_for_year_datalad
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import (
+    read_new_crf_for_year_datalad,
+)
 from unfccc_ghg_data.unfccc_crf_reader.util import NoCRFFilesError
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    #parser.add_argument('--countries', help='List of country codes', default=None)
-    parser.add_argument('--submission_year', help='Submission round to read')
-    parser.add_argument('--re_read', help='Read data also if already read before',
-                        action='store_true')
+    # parser.add_argument('--countries', help='List of country codes', default=None)
+    parser.add_argument("--submission_year", help="Submission round to read")
+    parser.add_argument(
+        "--re_read", help="Read data also if already read before", action="store_true"
+    )
 
     args = parser.parse_args()
 
-    #countries = args.countries
-    #if countries == "None":
+    # countries = args.countries
+    # if countries == "None":
     #    countries = None
     submission_year = args.submission_year
     re_read = args.re_read
@@ -27,8 +32,8 @@ if __name__ == "__main__":
     try:
         read_new_crf_for_year_datalad(
             submission_year=int(submission_year),
-    #        countries=countries,
-            re_read=re_read
+            #        countries=countries,
+            re_read=re_read,
         )
     except NoCRFFilesError as err:
         print(f"NoCRFFilesError: {err}")

+ 16 - 10
src/unfccc_ghg_data/unfccc_crf_reader/read_unfccc_crf_submission.py

@@ -1,19 +1,26 @@
 """
-This script is a wrapper around the read_crf_for_country
+Wrapper around read_crf_for_country
+
+Wrapper around the read_crf_for_country
 function such that it can be called from datalad
 """
 
 import argparse
 
-from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import read_crf_for_country
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import (
+    read_crf_for_country,
+)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--country', help='Country name or code')
-    parser.add_argument('--submission_year', help='Submission round to read', type=int)
-    parser.add_argument('--submission_date', help='Date of submission to read', default=None)
-    parser.add_argument('--re_read', help='Read data also if already read before',
-                        action='store_true')
+    parser.add_argument("--country", help="Country name or code")
+    parser.add_argument("--submission_year", help="Submission round to read", type=int)
+    parser.add_argument(
+        "--submission_date", help="Date of submission to read", default=None
+    )
+    parser.add_argument(
+        "--re_read", help="Read data also if already read before", action="store_true"
+    )
 
     args = parser.parse_args()
 
@@ -21,13 +28,12 @@ if __name__ == "__main__":
     submission_year = args.submission_year
     submission_date = args.submission_date
     re_read = args.re_read
-    if submission_date == 'None':
+    if submission_date == "None":
         submission_date = None
 
     read_crf_for_country(
         country,
         submission_year=submission_year,
         submission_date=submission_date,
-        re_read=re_read
+        re_read=re_read,
     )
-

+ 14 - 8
src/unfccc_ghg_data/unfccc_crf_reader/read_unfccc_crf_submission_datalad.py

@@ -1,4 +1,6 @@
 """
+Wrapper around read_crf_for_country_datalad
+
 wrapper around read_crf_for_country_datalad such that it can be called
 from doit in the current setup where doit runs on system python and
 not in the venv.
@@ -6,15 +8,20 @@ not in the venv.
 
 import argparse
 
-from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import read_crf_for_country_datalad
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import (
+    read_crf_for_country_datalad,
+)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--country', help='Country name or code')
-    parser.add_argument('--submission_year', help='Submission round to read')
-    parser.add_argument('--submission_date', help='Date of submission to read', default=None)
-    parser.add_argument('--re_read', help='Read data also if already read before',
-                        action='store_true')
+    parser.add_argument("--country", help="Country name or code")
+    parser.add_argument("--submission_year", help="Submission round to read")
+    parser.add_argument(
+        "--submission_date", help="Date of submission to read", default=None
+    )
+    parser.add_argument(
+        "--re_read", help="Read data also if already read before", action="store_true"
+    )
 
     args = parser.parse_args()
 
@@ -23,7 +30,6 @@ if __name__ == "__main__":
     submission_date = args.submission_date
     re_read = args.re_read
 
-
     if submission_date == "None":
         submission_date = None
 
@@ -31,5 +37,5 @@ if __name__ == "__main__":
         country,
         submission_year=int(submission_year),
         submission_date=submission_date,
-        re_read=re_read
+        re_read=re_read,
     )

+ 8 - 9
src/unfccc_ghg_data/unfccc_crf_reader/test_read_unfccc_crf_for_year.py

@@ -5,21 +5,22 @@ function such that it can be called from datalad
 
 import argparse
 
-from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_devel import read_year_to_test_specs
+from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_devel import (
+    read_year_to_test_specs,
+)
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--submission_year', help='Submission round to read', type=int)
-    parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
-    parser.add_argument('--country', help='Country to read', type=str, default=None)
-    parser.add_argument('--totest', help='read tables to test', action='store_true')
+    parser.add_argument("--submission_year", help="Submission round to read", type=int)
+    parser.add_argument("--data_year", help="Data year to read", type=int, default=2010)
+    parser.add_argument("--country", help="Country to read", type=str, default=None)
+    parser.add_argument("--totest", help="read tables to test", action="store_true")
     args = parser.parse_args()
 
-
     submission_year = args.submission_year
     data_year = args.data_year
     country = args.country
-    #print(f"totest: {args.totest}")
+    # print(f"totest: {args.totest}")
     if args.totest:
         totest = True
     else:
@@ -31,5 +32,3 @@ if __name__ == "__main__":
         totest=totest,
         country_code=country,
     )
-
-

+ 185 - 194
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_core.py

@@ -1,4 +1,6 @@
 """
+Core functions for the CRF / CRT reader
+
 This file holds the core functions of the CRF reader.
 Core function are used both for reading for final datasets as
 well as for test-reading to check for new categories etc.
@@ -8,6 +10,7 @@ import json
 import os
 import re
 from collections import Counter
+from collections.abc import Generator
 from datetime import datetime, timedelta
 from operator import itemgetter
 from pathlib import Path
@@ -26,51 +29,43 @@ from .util import NoCRFFilesError
 
 
 ### reading functions
-def convert_crf_table_to_pm2if(
+def convert_crf_table_to_pm2if(  # noqa: PLR0913
     df_table: pd.DataFrame,
     submission_year: int,
-    entity_mapping: Optional[dict[str, str]] = None,
-    coords_defaults_input: Optional[dict[str, str]] = None,
-    filter_remove_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
-    filter_keep_input: Optional[dict[str, dict[str, Union[str, list]]]] = None,
-    meta_data_input: Optional[dict[str, str]] = None,
+    entity_mapping: dict[str, str] | None = None,
+    coords_defaults_input: dict[str, str] | None = None,
+    filter_remove_input: dict[str, dict[str, str | list]] | None = None,
+    filter_keep_input: dict[str, dict[str, str | list]] | None = None,
+    meta_data_input: dict[str, str] | None = None,
 ) -> pd.DataFrame:
     """
-    Converts a given pandas long format crf table to PRIMAP2 interchange format
+    Convert a given pandas long format crf table to PRIMAP2 interchange format
 
     Parameters
     ----------
-    __________
-        df_table: pd.DataFrame
-            Data to convert
-
-        submission_year: int
-            Year of submission
-
-        entity_mapping: Optional[Dict[str,str]]
-            Mapping of entities to PRIMAP2 format. Not necessary for all tables
-
-        coords_defaults_input: Optional[Dict[str,str]],
-            Additional default values for coordinates. (e.g. "Total" for `type`)
-
-        filter_remove_input: Optional[Dict[str,Dict[str,Union[str,List]]]]
-            Filter to remove data during conversion. The format is as in
-            PRIMAP2
-
-        filter_keep_input: Optional[Dict[str,Dict[str,Union[str,List]]]]
-            Filter to keep only specified data during conversion.
-            The format is as in PRIMAP2
-
-        meta_data_input: Optional[Dict[str,str]]
-            Meta data information. If values filled by this function automatically
-            are given as input the automatic values are overwritten.
+    df_table: pd.DataFrame
+        Data to convert
+    submission_year: int
+        Year of submission
+    entity_mapping: Optional[Dict[str,str]]
+        Mapping of entities to PRIMAP2 format. Not necessary for all tables
+    coords_defaults_input: Optional[Dict[str,str]],
+        Additional default values for coordinates. (e.g. "Total" for `type`)
+    filter_remove_input: Optional[Dict[str,Dict[str,Union[str,List]]]]
+        Filter to remove data during conversion. The format is as in
+        PRIMAP2
+    filter_keep_input: Optional[Dict[str,Dict[str,Union[str,List]]]]
+        Filter to keep only specified data during conversion.
+        The format is as in PRIMAP2
+    meta_data_input: Optional[Dict[str,str]]
+        Meta data information. If values filled by this function automatically
+        are given as input the automatic values are overwritten.
 
     Returns
     -------
-    _______
-        pd.DataFrame:
-            Pandas DataFrame containing the data in PRIMAP2 interchange format
-            Metadata is stored as attrs in the DataFrame
+    pd.DataFrame:
+        Pandas DataFrame containing the data in PRIMAP2 interchange format
+        Metadata is stored as attrs in the DataFrame
     """
     coords_cols = {
         "category": "category",
@@ -126,12 +121,15 @@ def convert_crf_table_to_pm2if(
             filter_keep[key] = filter_keep_input[key]
 
     meta_data = {
-        "references": f"https://unfccc.int/ghg-inventories-annex-i-parties/{submission_year}",
+        "references": f"https://unfccc.int/ghg-inventories-annex-i-parties/"
+        f"{submission_year}",
         "rights": "",
         "contact": "mail@johannes-guetschow.de",
-        "title": f"Data submitted in {submission_year} to the UNFCCC in the common reporting format (CRF)",
+        "title": f"Data submitted in {submission_year} to the UNFCCC in the common "
+        f"reporting format (CRF)",
         "comment": "Read fom xlsx file by Johannes Gütschow",
-        "institution": "United Nations Framework Convention on Climate Change (www.unfccc.int)",
+        "institution": "United Nations Framework Convention on Climate Change "
+        "(www.unfccc.int)",
     }
     if meta_data_input is not None:
         for key in meta_data_input.keys():
@@ -153,16 +151,18 @@ def convert_crf_table_to_pm2if(
     return df_table_if
 
 
-def read_crf_table(
-    country_codes: Union[str, list[str]],
+def read_crf_table(  # noqa: PLR0913, PLR0912, PLR0915
+    country_codes: str | list[str],
     table: str,
     submission_year: int,
-    data_year: Optional[Union[int, list[int]]] = None,
-    date: Optional[str] = None,
-    folder: Optional[str] = None,
-    debug: Optional[bool] = False,
+    data_year: int | list[int] | None = None,
+    date: str | None = None,
+    folder: str | None = None,
+    debug: bool = False,
 ) -> tuple[pd.DataFrame, list[list], list[list]]:
     """
+    Read CRF table for given year and country/countries
+
     Read CRF table for given submission year and country / or countries
     This function can read for multiple years and countries but only a single
     table. The reason is that combining data from different tables needs
@@ -173,41 +173,32 @@ def read_crf_table(
 
     Parameters
     ----------
-    __________
-
     country_codes: str or list[str]
         ISO 3-letter country code or list of country codes
-
     table: str
         name of the table sheet in the CRF xlsx file
-
     submission_year: int
         Year of the submission of the data
-
     data_year: int or List of int (optional)
         if int a single data year will be read. if a list of ints is given these
         years will be read. If no nothing is given all data years will be read
-
     date: str (optional, default is "latest")
         readonly submission from the given date
-
     folder: str (optional)
         Folder that contains the xls files. If not given fodlers are determined by the
         submissions_year and country_code variables
-
     debug: bool (optional)
         if true print some debug information like column headers
 
     Returns
     -------
-    _______
-        Tuple[pd.DataFrame, List[List], List[List]]:
+    Tuple[pd.DataFrame, List[List], List[List]]:
 
-        * First return parameter is the data as a pandas DataFrame in long format
-        * Second return parameter is a list of unknown categories / row headers
-        * Third return parameter holds information on data found in the last read row.
-          This is used as a hint to check if table specifications might have to be adapted
-          as country submitted tables are longer than expected.
+    * First return parameter is the data as a pandas DataFrame in long format
+    * Second return parameter is a list of unknown categories / row headers
+    * Third return parameter holds information on data found in the last read row.
+      This is used as a hint to check if table specifications might have to be adapted
+      as country submitted tables are longer than expected.
 
     """
     if isinstance(country_codes, str):
@@ -221,7 +212,8 @@ def read_crf_table(
         date=date,
         folder=folder,
     )
-    # nasty fix for cases where exporting ran overnight and not all files have the same date
+    # nasty fix for cases where exporting ran overnight and not all files have
+    # the same date
     if (date is not None) and (len(country_codes) == 1):
         if isinstance(data_year, list):
             expected_files = len(data_year)
@@ -235,7 +227,8 @@ def read_crf_table(
                 f"Expected {expected_files}."
             )
             print(
-                "Possibly exporting run overnight and some files have the previous day as date."
+                "Possibly exporting run overnight and some files have the previous "
+                "day as date."
             )
             date_datetime = datetime.strptime(date, "%d%m%Y")
             date_datetime = date_datetime - timedelta(days=1)
@@ -254,7 +247,7 @@ def read_crf_table(
                 print("Found no additional input files")
 
     if input_files == []:
-        raise NoCRFFilesError(
+        raise NoCRFFilesError(  # noqa: TRY003
             f"No files found for {country_codes}, "
             f"submission_year={submission_year}, "
             f"data_year={data_year}, "
@@ -272,19 +265,19 @@ def read_crf_table(
                 f"Using country specific specification: "
                 f"CRF{submission_year}_{country_codes[0]}"
             )
-        except:
+        except:  # noqa: E722
             # no country specific specification, check for general specification
             try:
                 crf_spec = getattr(crf, f"CRF{submission_year}")
-            except:
-                raise ValueError(
+            except:  # noqa: E722
+                raise ValueError(  # noqa: TRY003, TRY200
                     f"No terminology exists for submission year " f"{submission_year}"
                 )
     else:
         try:
             crf_spec = getattr(crf, f"CRF{submission_year}")
-        except:
-            raise ValueError(
+        except:  # noqa: E722
+            raise ValueError(  # noqa: TRY003, TRY200
                 f"No terminology exists for submission year " f"{submission_year}"
             )
 
@@ -315,41 +308,38 @@ def read_crf_table(
     return df_all, unknown_rows, last_row_info
 
 
-def read_crf_table_from_file(
+def read_crf_table_from_file(  # noqa: PLR0912, PLR0915
     file: Path,
     table: str,
     table_spec: dict[str, dict],
-    debug: Optional[bool] = False,
+    debug: bool = False,
 ) -> tuple[pd.DataFrame, list[list], list[list]]:
     """
+    Read single crf table from file
+
     Read a single CRF table from a given file. This is the core function of the CRF
     reading process as it reads the data from xls and performs the category mapping.
 
     Parameters
     ----------
-    __________
     file: Path
         file to read from
-
     table: str
         table to read (name of the sheet in the xlsx file)
-
     table_spec: Dict[str, Dict]
         Specification for the given table, e.g. CRF2021["Table4"]
-
     debug: bool (optional)
         if true print some debug information like column headers
 
     Returns
     -------
-    _______
-        Tuple[pd.DataFrame, List[List], List[List]]:
+    Tuple[pd.DataFrame, List[List], List[List]]:
 
-        * First return parameter is the data as a pandas DataFrame in long format
-        * Second return parameter is a list of unknown categories / row headers
-        * Third return parameter holds information on data found in the last read row.
-          This is used as a hint to check if table specifications might have to be adapted
-          as country submitted tables are longer than expected.
+    * First return parameter is the data as a pandas DataFrame in long format
+    * Second return parameter is a list of unknown categories / row headers
+    * Third return parameter holds information on data found in the last read row.
+      This is used as a hint to check if table specifications might have to be adapted
+      as country submitted tables are longer than expected.
 
     TODO: add verbosity option for debugging?
     """
@@ -396,8 +386,8 @@ def read_crf_table_from_file(
     nrows = (
         table_properties["lastrow"] - skiprows + 1
     )  # read one row more to check if we reached the end
-    # we read with user specific NaN treatment as the NaN treatment is part of the conversion to
-    # PRIMAP2 format.
+    # we read with user specific NaN treatment as the NaN treatment is part of
+    # the conversion to PRIMAP2 format.
     df_raw = pd.read_excel(
         file,
         sheet_name=table,
@@ -428,16 +418,16 @@ def read_crf_table_from_file(
     # remove empty first column (for Australia tables start with an empty column)
     # df_raw = df_raw.dropna(how="all", axis=1)
     if df_raw.iloc[:, 0].isna().all():
-        cols_to_drop.append(df_raw.columns.values[0])
+        cols_to_drop.append(df_raw.columns.to_numpy()[0])
     # select only first table by cutting everything after a all-nan column (unless
     # it's the first column)
     if debug:
-        print(f"Header before table end detection: {df_raw.columns.values}")
+        print(f"Header before table end detection: {df_raw.columns.to_numpy()}")
     for colIdx in range(1, len(df_raw.columns.values)):
         if (df_raw.iloc[:, colIdx].isna().all()) & (
             df_raw.columns[colIdx].startswith("Unnamed")
         ):
-            cols_to_drop = cols_to_drop + list(df_raw.columns.values[colIdx:])
+            cols_to_drop = cols_to_drop + list(df_raw.columns.to_numpy()[colIdx:])
             if debug:
                 print(f"cols_to_drop: {cols_to_drop}")
             break
@@ -454,7 +444,7 @@ def read_crf_table_from_file(
     # as pandas can not fill values of merged cells in all individual cells
     # we have to use some filling algorithm.
     df_header = df_raw.iloc[0 : len(table_properties["header"]) - 1].copy(deep=True)
-    df_header.loc[-1] = df_header.columns.values
+    df_header.loc[-1] = df_header.columns.to_numpy()
     df_header.index = df_header.index + 1
     # replace "Unnamed: X" colum names by nan to fill from left in next step
     df_header = df_header.sort_index()
@@ -477,16 +467,15 @@ def read_crf_table_from_file(
     for idx, row in enumerate(header):
         if table_properties["header"][idx] == "unit":
             units = row
+        elif entities is None:
+            entities = row
         else:
-            if entities is None:
-                entities = row
-            else:
-                for col, value in enumerate(row):
-                    if str(value) != "nan":
-                        entities[col] = f"{entities[col]} {value}"
+            for col, value in enumerate(row):
+                if str(value) != "nan":
+                    entities[col] = f"{entities[col]} {value}"
 
     if units is None:
-        raise ValueError(
+        raise ValueError(  # noqa: TRY003
             f"Specification for table {table} does not contain unit information."
         )
 
@@ -495,7 +484,7 @@ def read_crf_table_from_file(
     entities = [re.sub("\\s+", " ", entity) for entity in entities]
 
     # replace the old header
-    if len(header) > 2:
+    if len(header) > 2:  # noqa: PLR2004
         df_current = df_raw.drop(index=df_raw.iloc[0 : len(header) - 2].index)
     else:
         df_current = df_raw
@@ -555,65 +544,65 @@ def read_crf_table_from_file(
                 new_children = category_tree.children(node.identifier)
                 if new_children:
                     last_parent = node
-            else:
-                # two possibilities
-                # 1. The category is at a higher point in the hierarchy
-                # 2. It's missing in the hierarchy
-                # we have to first move up the hierarchy
-                # first check if category is present at all
-                if current_cat in all_nodes:
-                    old_parent = last_parent
-
-                    while (current_cat not in children.keys()) and (
-                        last_parent.identifier != "root"
-                    ):
-                        last_parent = category_tree.get_node(
-                            last_parent.predecessor(category_tree.identifier)
-                        )
-                        children = dict(
-                            [
-                                [child.tag, child.identifier]
-                                for child in category_tree.children(
-                                    last_parent.identifier
-                                )
-                            ]
-                        )
-
-                    if (last_parent.identifier == "root") and (
-                        current_cat not in children.keys()
-                    ):
-                        # we have not found the category as direct child of any of the
-                        # predecessors. Thus it is missing in the specification in
-                        # that place
-                        print(
-                            f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, "
-                            f"{file_info['data_year']} (last parent: {old_parent.tag})."
-                        )
-                        unknown_categories.append(
-                            [
-                                table,
-                                file_info["party"],
-                                current_cat,
-                                file_info["data_year"],
-                            ]
-                        )
-                        # copy back the parent info to continue with next category
-                        last_parent = old_parent
-                    else:
-                        # do the mapping
-                        node = category_tree.get_node(children[current_cat])
-                        new_cats[idx] = node.data[1]
-                        # check if the node has children
-                        new_children = category_tree.children(node.identifier)
-                        if new_children:
-                            last_parent = node
-                else:
+
+            # two other possibilities
+            # 1. The category is at a higher point in the hierarchy
+            # 2. It's missing in the hierarchy
+            # we have to first move up the hierarchy
+            # first check if category is present at all
+            elif current_cat in all_nodes:
+                old_parent = last_parent
+
+                while (current_cat not in children.keys()) and (
+                    last_parent.identifier != "root"
+                ):
+                    last_parent = category_tree.get_node(
+                        last_parent.predecessor(category_tree.identifier)
+                    )
+                    children = dict(
+                        [
+                            [child.tag, child.identifier]
+                            for child in category_tree.children(last_parent.identifier)
+                        ]
+                    )
+
+                if (last_parent.identifier == "root") and (
+                    current_cat not in children.keys()
+                ):
+                    # we have not found the category as direct child of any of the
+                    # predecessors. Thus it is missing in the specification in
+                    # that place
                     print(
-                        f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
+                        f"Unknown category '{current_cat}' found in {table} for "
+                        f"{file_info['party']}, {file_info['data_year']} "
+                        f"(last parent: {old_parent.tag})."
                     )
                     unknown_categories.append(
-                        [table, file_info["party"], current_cat, file_info["data_year"]]
+                        [
+                            table,
+                            file_info["party"],
+                            current_cat,
+                            file_info["data_year"],
+                        ]
                     )
+                    # copy back the parent info to continue with next category
+                    last_parent = old_parent
+                else:
+                    # do the mapping
+                    node = category_tree.get_node(children[current_cat])
+                    new_cats[idx] = node.data[1]
+                    # check if the node has children
+                    new_children = category_tree.children(node.identifier)
+                    if new_children:
+                        last_parent = node
+            else:
+                print(
+                    f"Unknown category '{current_cat}' found in {table} for "
+                    f"{file_info['party']}, {file_info['data_year']}."
+                )
+                unknown_categories.append(
+                    [table, file_info["party"], current_cat, file_info["data_year"]]
+                )
     else:
         for idx in range(1, len(df_current)):
             current_cat = df_current.iloc[idx][cat_col]
@@ -627,14 +616,16 @@ def read_crf_table_from_file(
                 new_cats[idx] = unique_mapping[current_cat]
                 if (idx == len(df_current) - 1) and not last_row_nan:
                     print(
-                        f"found information in last row: category {current_cat}, row {idx}"
+                        f"found information in last row: category {current_cat}, "
+                        f"row {idx}"
                     )
                     info_last_row.append(
                         [table, file_info["party"], current_cat, file_info["data_year"]]
                     )
             else:
                 print(
-                    f"Unknown category '{current_cat}' found in {table} for {file_info['party']}, {file_info['data_year']}."
+                    f"Unknown category '{current_cat}' found in {table} for "
+                    f"{file_info['party']}, {file_info['data_year']}."
                 )
                 unknown_categories.append(
                     [table, file_info["party"], current_cat, file_info["data_year"]]
@@ -672,7 +663,7 @@ def read_crf_table_from_file(
     return df_long, unknown_categories, info_last_row
 
 
-def get_crf_files(
+def get_crf_files(  # noqa: PLR0912
     country_codes: Union[str, list[str]],
     submission_year: int,
     data_year: Optional[Union[int, list[int]]] = None,
@@ -680,7 +671,7 @@ def get_crf_files(
     folder: Optional[str] = None,
 ) -> list[Path]:
     """
-    Finds all files according to given parameters
+    Find all files according to given parameters
 
     Parameters
     ----------
@@ -736,7 +727,7 @@ def get_crf_files(
                         for folder in new_country_folders
                     ]
             else:
-                raise ValueError(
+                raise ValueError(  # noqa: TRY003
                     f"No data folder found for country {country_code}. "
                     f"Check if folder mapping is up to date."
                 )
@@ -750,8 +741,8 @@ def get_crf_files(
         file_filter_template["data_year"] = data_year
 
     for input_folder in country_folders:
-        input_folder = Path(input_folder)
-        if input_folder.exists():
+        input_folder_path = Path(input_folder)
+        if input_folder_path.exists():
             # if desired find the latest date and only read that
             # has to be done per country
             if date == "latest":
@@ -761,19 +752,19 @@ def get_crf_files(
                     dates = get_submission_dates(folder, file_filter)
                     file_filter["date"] = find_latest_date(dates)
                     input_files = input_files + filter_filenames(
-                        input_folder.glob("*.xlsx"), **file_filter
+                        input_folder_path.glob("*.xlsx"), **file_filter
                     )
             else:
                 file_filter = file_filter_template.copy()
                 if date is not None:
                     file_filter["date"] = date
                 input_files = input_files + filter_filenames(
-                    input_folder.glob("*.xlsx"), **file_filter
+                    input_folder_path.glob("*.xlsx"), **file_filter
                 )
         # else:
         #    raise ValueError(f"Folder {input_folder} does not exist")
     if len(input_files) == 0:
-        raise ValueError(f"No input files found in {country_folders}")
+        raise ValueError(f"No input files found in {country_folders}")  # noqa: TRY003
 
     # make sure no files is in the list twice (happens when multiple input folder
     # contain the same submission which is possible when the country name is changed)
@@ -791,8 +782,7 @@ def get_info_from_crf_filename(
     filename: str,
 ) -> dict[str, Union[int, str]]:
     """
-    Parse given file name and return a dict with information
-    on the contained data.
+    Parse given file name and return a dict with information on contained data.
 
     Parameters
     ----------
@@ -816,12 +806,12 @@ def get_info_from_crf_filename(
     file_info["submission_year"] = int(name_parts[1])
     try:
         file_info["data_year"] = int(name_parts[2])
-    except:
+    except:  # noqa: E722
         print(f"Data year string {name_parts[2]} " "could not be converted to int.")
         file_info["data_year"] = name_parts[2]
     file_info["date"] = name_parts[3]
     # the last part (time code) is missing for Australia since 2023
-    if len(name_parts) > 4:
+    if len(name_parts) > 4:  # noqa: PLR2004
         file_info["extra"] = name_parts[4]
     else:
         file_info["extra"] = ""
@@ -829,7 +819,7 @@ def get_info_from_crf_filename(
 
 
 def filter_filenames(
-    files_to_filter: list[Path],
+    files_to_filter: list[Path] | Generator[Path, None, None],
     party: Optional[Union[str, list[str]]] = None,
     data_year: Optional[Union[int, list[int]]] = None,
     submission_year: Optional[str] = None,
@@ -878,7 +868,7 @@ def filter_filenames(
     return filtered_files
 
 
-def check_crf_file_info(
+def check_crf_file_info(  # noqa: PLR0911
     file_info: dict,
     file_filter: dict,
 ) -> bool:
@@ -910,16 +900,14 @@ def check_crf_file_info(
         if isinstance(file_filter["data_year"], int):
             if file_info["data_year"] != file_filter["data_year"]:
                 return False
-        else:
-            if file_info["data_year"] not in file_filter["data_year"]:
-                return False
+        elif file_info["data_year"] not in file_filter["data_year"]:
+            return False
     if "party" in file_filter.keys():
         if isinstance(file_filter["party"], str):
             if file_info["party"] != file_filter["party"]:
                 return False
-        else:
-            if file_info["party"] not in file_filter["party"]:
-                return False
+        elif file_info["party"] not in file_filter["party"]:
+            return False
     return True
 
 
@@ -929,6 +917,8 @@ def create_category_tree(
     country: Optional[str] = None,
 ) -> Tree:
     """
+    Create a category hierarchy tree from a CRF table specification
+
     Create a treelib Tree for the categorical hierarchy from a CRF
     table specification.
 
@@ -946,13 +936,13 @@ def create_category_tree(
 
     country: str (optional)
         Country name to build the table for. Some categories are country dependent.
-        To include them in the tree the country name has to be specified. If no country name
-        is given the generic tree will be built.
+        To include them in the tree the country name has to be specified. If no
+        country name is given the generic tree will be built.
 
     """
     # small sanity check on the specification
-    if len(specification[0]) < 3:
-        raise ValueError(
+    if len(specification[0]) < 3:  # noqa: PLR2004
+        raise ValueError(  # noqa: TRY003
             f"Error: Specification for Table {table} has non-unique "
             "categories and need level specifications"
         )
@@ -1021,7 +1011,7 @@ def create_category_tree(
             )
         else:
             # increase in levels of more than one is not allowed
-            raise ValueError(
+            raise ValueError(  # noqa: TRY003
                 f"Error in sector hierarchy for table {table}, category {current_cat}: "
                 f"Category level is {current_cat_level} and parent level is "
                 f"{parent_info[-1]['level']}"
@@ -1039,7 +1029,9 @@ def filter_category(
     mapping: list,
     country: str,
 ) -> list[str]:
-    """
+    r"""
+    Check if category mapping suitable for country
+
     This function checks if a category mapping is suitable for the given country.
     If it is the country information will be removed and the new mapping returned.
     If it is not suitable it will be returned with category name "\\REMOVE" such that
@@ -1047,14 +1039,14 @@ def filter_category(
 
     Parameters
     ----------
-        mapping: List
-            mapping for a single category
-        country: str
-            iso 3-letter code of the country
+    mapping: List
+        mapping for a single category
+    country: str
+        iso 3-letter code of the country
 
     Returns
     -------
-        List: updated mapping
+    List: updated mapping
 
     """
     string_exclude = "\\C!-"
@@ -1127,7 +1119,7 @@ def get_latest_date_for_country(
                     dates = dates + get_submission_dates(folder_submission, file_filter)
             submission_date = find_latest_date(dates)
     else:
-        raise ValueError(
+        raise ValueError(  # noqa: TRY003
             f"No data folder found for country {country_code}. "
             f"Check if folder mapping is up to date."
         )
@@ -1140,7 +1132,7 @@ def get_submission_dates(
     file_filter: dict[str, Union[str, int, list]],
 ) -> list[str]:
     """
-    Returns all submission dates available in a folder
+    Return all submission dates available in a folder
 
     Parameters
     ----------
@@ -1156,7 +1148,7 @@ def get_submission_dates(
             List of dates as str
     """
     if "date" in file_filter:
-        raise ValueError(
+        raise ValueError(  # noqa: TRY003
             "'date' present in 'file_filter'. This makes no sense as "
             "the function's purpose is to return available dates."
         )
@@ -1164,7 +1156,7 @@ def get_submission_dates(
     if folder.exists():
         files = filter_filenames(folder.glob("*.xlsx"), **file_filter)
     else:
-        raise ValueError(f"Folder {folder} does not exist")
+        raise ValueError(f"Folder {folder} does not exist")  # noqa: TRY003
 
     dates = [get_info_from_crf_filename(file.name)["date"] for file in files]
     dates = list(set(dates))
@@ -1177,7 +1169,7 @@ def get_submission_parties(
     file_filter: dict[str, Union[str, int, list]],
 ) -> list[str]:
     """
-    Returns all submission dates available in a folder
+    Return all submission dates available in a folder
 
     Parameters
     ----------
@@ -1189,11 +1181,11 @@ def get_submission_parties(
 
     Returns
     -------
-        List[str]:
-            List of parties as str
+    List[str]:
+        List of parties as str
     """
     if "party" in file_filter:
-        raise ValueError(
+        raise ValueError(  # noqa: TRY003
             "'party' present in 'file_filter'. This makes no sense as "
             "the function's purpose is to return available parties."
         )
@@ -1201,7 +1193,7 @@ def get_submission_parties(
     if folder.exists():
         files = filter_filenames(list(folder.glob("*.xlsx")), **file_filter)
     else:
-        raise ValueError(f"Folder {folder} does not exist")
+        raise ValueError(f"Folder {folder} does not exist")  # noqa: TRY003
 
     parties = [get_info_from_crf_filename(file.name)["party"] for file in files]
     parties = list(set(parties))
@@ -1214,8 +1206,7 @@ def find_latest_date(
     date_format: str = "%d%m%Y",
 ) -> str:
     """
-    Returns the latest date in a list of dates as str in the format
-    ddmmyyyy
+    Return the latest date in a list of dates as str in the format ddmmyyyy
 
     Parameters
     ----------
@@ -1232,6 +1223,6 @@ def find_latest_date(
         ]
         dates_datetime = sorted(dates_datetime, key=itemgetter(1))
     else:
-        raise ValueError("Passed list of dates is empty")
+        raise ValueError("Passed list of dates is empty")  # noqa: TRY003
 
     return dates_datetime[-1][0]

+ 147 - 91
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_devel.py

@@ -1,14 +1,15 @@
 """
+Functions for CRF reading development
+
 This file holds functions that are used in CRF reading development like
 adding new tables or new submission years (and according country specific
-categories). Thue functions are tailored towards debug output and reading
+categories). The functions are tailored towards debug output and reading
 of single years in contrast to the production functions which are tailored
 towards the creation of full datasets including storage in the
 """
 
 from datetime import date
 from pathlib import Path
-from typing import Optional
 
 import pandas as pd
 import primap2 as pm2
@@ -17,28 +18,46 @@ import xarray as xr
 from unfccc_ghg_data.helper import get_country_name, log_path
 
 from . import crf_specifications as crf
-from .unfccc_crf_reader_core import convert_crf_table_to_pm2if, get_latest_date_for_country, read_crf_table
+from .unfccc_crf_reader_core import (
+    convert_crf_table_to_pm2if,
+    get_latest_date_for_country,
+    read_crf_table,
+)
 from .util import all_crf_countries
 
 
-def read_year_to_test_specs(
-        submission_year: int,
-        data_year: Optional[int]=None,
-        totest: Optional[bool]=False,
-        country_code: Optional=None,
+def read_year_to_test_specs(  # noqa: PLR0912, PLR0915
+    submission_year: int,
+    data_year: int | None = None,
+    totest: bool | None = False,
+    country_code: str | None = None,
 ) -> xr.Dataset:
     """
+    Read on file per country
+
     Read one xlsx file (so one data year) for each country for a submission year to
     create log files and extend the specifications
 
-    totest: if true only read tables with "totest" status
-
+    Parameters
+    ----------
+    submission_year
+        submission year to read
+    data_year
+        year to read
+    totest
+        if true only read tables with "totest" status
+    country_code
+        country to read. If not given all countries will be read
+
+    Returns
+    -------
+    xr.Dataset with data for given parameters
     """
     if data_year is None:
-        data_year=2000
+        data_year = 2000
 
-    if country_code=="None":
-        country_code=None
+    if country_code == "None":
+        country_code = None
 
     unknown_categories = []
     last_row_info = []
@@ -46,7 +65,7 @@ def read_year_to_test_specs(
     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
     if totest:
         print("Reading only tables to test.")
-    print("#"*80)
+    print("#" * 80)
 
     if country_code is not None:
         countries_to_read = [country_code]
@@ -57,7 +76,7 @@ def read_year_to_test_specs(
         country_name = get_country_name(country_code)
         print(f"reading for country: {country_code}")
         # get specification and available tables
-           # if we only have a single country check if we might have a country specific
+        # if we only have a single country check if we might have a country specific
         # specification (currently only Australia, 2023)
         if country_code is not None:
             try:
@@ -66,35 +85,44 @@ def read_year_to_test_specs(
                     f"Using country specific specification: "
                     f"CRF{submission_year}_{country_code}"
                 )
-            except:
+            except Exception:
                 # no country specific specification, check for general specification
                 try:
                     crf_spec = getattr(crf, f"CRF{submission_year}")
-                except:
-                    raise ValueError(
-                        f"No terminology exists for submission year " f"{submission_year}"
-                    )
+                except Exception as ex:
+                    raise ValueError(  # noqa: TRY003
+                        f"No terminology exists for submission year "
+                        f"{submission_year}"
+                    ) from ex
         else:
             try:
                 crf_spec = getattr(crf, f"CRF{submission_year}")
-            except:
-                raise ValueError(
+            except Exception as ex:
+                raise ValueError(  # noqa: TRY003
                     f"No terminology exists for submission year " f"{submission_year}"
-                )
+                ) from ex
 
         if totest:
-            tables = [table for table in crf_spec.keys()
-                      if crf_spec[table]["status"] == "totest"]
+            tables = [
+                table
+                for table in crf_spec.keys()
+                if crf_spec[table]["status"] == "totest"
+            ]
         else:
-            tables = [table for table in crf_spec.keys()
-                      if crf_spec[table]["status"] == "tested"]
-        print(f"The following tables are available in the " \
-              f"CRF{submission_year} specification: {tables}")
+            tables = [
+                table
+                for table in crf_spec.keys()
+                if crf_spec[table]["status"] == "tested"
+            ]
+        print(
+            f"The following tables are available in the "
+            f"CRF{submission_year} specification: {tables}"
+        )
         print("#" * 80)
 
         try:
             submission_date = get_latest_date_for_country(country_code, submission_year)
-        except:
+        except Exception:
             print(f"No submissions for country {country_name}, CRF{submission_year}")
             submission_date = None
 
@@ -102,8 +130,13 @@ def read_year_to_test_specs(
             for table in tables:
                 # read table for given years
                 ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
-                    country_code, table, submission_year, date=submission_date,
-                    data_year=[data_year], debug=True)
+                    country_code,
+                    table,
+                    submission_year,
+                    date=submission_date,
+                    data_year=[data_year],
+                    debug=True,
+                )
 
                 # collect messages on unknown rows etc
                 unknown_categories = unknown_categories + new_unknown_categories
@@ -124,37 +157,44 @@ def read_year_to_test_specs(
                     ds_table_if = convert_crf_table_to_pm2if(
                         ds_table,
                         submission_year,
-                        meta_data_input={"title": f"Data submitted in {submission_year} to the UNFCCC "
-                                                  f"in the common reporting format (CRF) by {country_name}. "
-                                                  f"Submission date: {submission_date}"},
+                        meta_data_input={
+                            "title": f"Data submitted in {submission_year} to the "
+                            f"UNFCCC in the common reporting format (CRF) "
+                            f"by {country_name}. "
+                            f"Submission date: {submission_date}"
+                        },
                         entity_mapping=entity_mapping,
                     )
 
                     # now convert to native PRIMAP2 format
                     ds_table_pm2 = pm2.pm2io.from_interchange_format(ds_table_if)
 
-                    # if individual data for emissions and removals / recovery exist combine
-                    # them
-                    if (('CO2 removals' in ds_table_pm2.data_vars) and
-                            ('CO2 emissions' in ds_table_pm2.data_vars) and "CO2" not in ds_table_pm2.data_vars):
+                    # if individual data for emissions and removals / recovery exist
+                    # combine them
+                    if (
+                        ("CO2 removals" in ds_table_pm2.data_vars)
+                        and ("CO2 emissions" in ds_table_pm2.data_vars)
+                        and "CO2" not in ds_table_pm2.data_vars
+                    ):
                         # we can just sum to CO2 as we made sure that it doesn't exist.
                         # If we have CO2 and removals but not emissions, CO2 already has
                         # removals subtracted and we do nothing here
-                        ds_table_pm2["CO2"] = ds_table_pm2[["CO2 emissions",
-                                                            "CO2 removals"]].pr.sum(
-                            dim="entity", skipna=True, min_count=1
-                        )
+                        ds_table_pm2["CO2"] = ds_table_pm2[
+                            ["CO2 emissions", "CO2 removals"]
+                        ].pr.sum(dim="entity", skipna=True, min_count=1)
                         ds_table_pm2["CO2"].attrs["entity"] = "CO2"
 
-                    if (('CH4 removals' in ds_table_pm2.data_vars) and
-                            ('CH4 emissions' in ds_table_pm2.data_vars) and "CH4" not in ds_table_pm2.data_vars):
+                    if (
+                        ("CH4 removals" in ds_table_pm2.data_vars)
+                        and ("CH4 emissions" in ds_table_pm2.data_vars)
+                        and "CH4" not in ds_table_pm2.data_vars
+                    ):
                         # we can just sum to CH4 as we made sure that it doesn't exist.
                         # If we have CH4 and removals but not emissions, CH4 already has
                         # removals subtracted and we do nothing here
-                        ds_table_pm2["CH4"] = ds_table_pm2[["CH4 emissions",
-                                                            "CH4 removals"]].pr.sum(
-                            dim="entity", skipna=True, min_count=1
-                        )
+                        ds_table_pm2["CH4"] = ds_table_pm2[
+                            ["CH4 emissions", "CH4 removals"]
+                        ].pr.sum(dim="entity", skipna=True, min_count=1)
                         ds_table_pm2["CH4"].attrs["entity"] = "CH4"
 
                     # combine per table DS
@@ -163,8 +203,10 @@ def read_year_to_test_specs(
                     else:
                         ds_all = ds_all.combine_first(ds_table_pm2)
                 except Exception as e:
-                    print(f"Error occured when converting table {table} for"
-                          f" {country_name} to PRIMAP2 IF. Exception: {e}")
+                    print(
+                        f"Error occured when converting table {table} for"
+                        f" {country_name} to PRIMAP2 IF. Exception: {e}"
+                    )
                     # TODO: error handling and logging
 
     # process log messages.
@@ -175,37 +217,39 @@ def read_year_to_test_specs(
                 log_path
                 / f"CRF{submission_year}"
                 / f"{data_year}_unknown_categories_{country_code}"
-                  f"_{today.strftime('%Y-%m-%d')}.csv"
+                f"_{today.strftime('%Y-%m-%d')}.csv"
             )
         else:
-            log_location = (log_path / f"CRF{submission_year}"
-                            / f"{data_year}_unknown_categories_"
-                              f"{today.strftime('%Y-%m-%d')}.csv")
+            log_location = (
+                log_path / f"CRF{submission_year}" / f"{data_year}_unknown_categories_"
+                f"{today.strftime('%Y-%m-%d')}.csv"
+            )
         print(f"Unknown rows found. Savin log to {log_location}")
         save_unknown_categories_info(unknown_categories, log_location)
 
     if len(last_row_info) > 0:
         if country_code is not None:
             log_location = (
-               log_path
-               / f"CRF{submission_year}"
-               / f"{data_year}_last_row_info_{country_code}_"
-                 f"{today.strftime('%Y-%m-%d')}.csv"
-           )
+                log_path
+                / f"CRF{submission_year}"
+                / f"{data_year}_last_row_info_{country_code}_"
+                f"{today.strftime('%Y-%m-%d')}.csv"
+            )
         else:
-            log_location = (log_path / f"CRF{submission_year}"
-                            / f"{data_year}_last_row_info_"
-                              f"{today.strftime('%Y-%m-%d')}.csv")
-        print(f"Data found in the last row. Saving log to "
-              f"{log_location}")
+            log_location = (
+                log_path / f"CRF{submission_year}" / f"{data_year}_last_row_info_"
+                f"{today.strftime('%Y-%m-%d')}.csv"
+            )
+        print(f"Data found in the last row. Saving log to " f"{log_location}")
         save_last_row_info(last_row_info, log_location)
 
     # save the data:
     compression = dict(zlib=True, complevel=9)
     output_folder = log_path / f"test_read_CRF{submission_year}"
     if country_code is not None:
-        output_filename = (f"CRF{submission_year}_{country_code}_"
-                           f"{today.strftime('%Y-%m-%d')}")
+        output_filename = (
+            f"CRF{submission_year}_{country_code}_" f"{today.strftime('%Y-%m-%d')}"
+        )
     else:
         output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
     if totest:
@@ -215,28 +259,26 @@ def read_year_to_test_specs(
         output_folder.mkdir()
 
     # write data in interchange format
-    pm2.pm2io.write_interchange_format(output_folder / output_filename,
-                                       ds_all.pr.to_interchange_format())
+    pm2.pm2io.write_interchange_format(
+        output_folder / output_filename, ds_all.pr.to_interchange_format()
+    )
 
     # write data in native PRIMAP2 format
     encoding = {var: compression for var in ds_all.data_vars}
-    ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"),
-                        encoding=encoding)
+    ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"), encoding=encoding)
 
     return ds_all
 
 
 def save_unknown_categories_info(
-        unknown_categories: list[list],
-        file: Path,
+    unknown_categories: list[list],
+    file: Path,
 ) -> None:
     """
     Save information on unknown categories to a csv file.
 
     Parameters
     ----------
-    __________
-
     unknown_categories: List[List]
         List of lists with information on the unknown categories.
         (which table, country and year, and which categories)
@@ -246,51 +288,57 @@ def save_unknown_categories_info(
 
     """
     # process unknown categories
-    df_unknown_cats = pd.DataFrame(unknown_categories, columns=["Table", "Country", "Category", "Year"])
+    df_unknown_cats = pd.DataFrame(
+        unknown_categories, columns=["Table", "Country", "Category", "Year"]
+    )
 
     processed_cats = []
     all_tables = df_unknown_cats["Table"].unique()
     all_years = set(df_unknown_cats["Year"].unique())
     all_years = set([year for year in all_years if isinstance(year, int)])
-    all_years = set([year for year in all_years if int(year) > 1989])
+    all_years = set([year for year in all_years if int(year) > 1989])  # noqa: PLR2004
     for table in all_tables:
         df_cats_current_table = df_unknown_cats[df_unknown_cats["Table"] == table]
         cats_current_table = list(df_cats_current_table["Category"].unique())
         for cat in cats_current_table:
-            df_current_cat_table = df_cats_current_table[df_cats_current_table["Category"] == cat]
+            df_current_cat_table = df_cats_current_table[
+                df_cats_current_table["Category"] == cat
+            ]
             all_countries = df_current_cat_table["Country"].unique()
             countries_cat = ""
             for country in all_countries:
-                years_country = df_current_cat_table[df_current_cat_table["Country"] == country]["Year"].unique()
+                years_country = df_current_cat_table[
+                    df_current_cat_table["Country"] == country
+                ]["Year"].unique()
                 if set(years_country) == all_years:
                     countries_cat = f"{countries_cat}; {country}"
                 else:
                     countries_cat = f"{countries_cat}; {country} ({years_country})"
             processed_cats.append([table, cat, countries_cat])
 
-
     if not file.parents[1].exists():
         file.parents[1].mkdir()
     if not file.parents[0].exists():
         file.parents[0].mkdir()
-    df_processed_cats = pd.DataFrame(processed_cats, columns=["Table", "Category", "Countries"])
+    df_processed_cats = pd.DataFrame(
+        processed_cats, columns=["Table", "Category", "Countries"]
+    )
     df_processed_cats.to_csv(file, index=False)
 
 
 def save_last_row_info(
-        last_row_info: list[list],
-        file: Path,
-    ) -> None:
+    last_row_info: list[list],
+    file: Path,
+) -> None:
     """
     Save information on data found in the last row read for a table.
+
     The last row read should not contain data. If it does contain data
     it is a hint that table size is larger for some countries than
     given in the specification and thus we might not read the full table.
 
     Parameters
     ----------
-    __________
-
     last_row_info: List[List]
         List of lists with information on the unknown categories.
         (which table, country and year, and which categories)
@@ -300,22 +348,28 @@ def save_last_row_info(
 
     """
     # process last row with information messages
-    df_last_row_info = pd.DataFrame(last_row_info, columns=["Table", "Country", "Category", "Year"])
+    df_last_row_info = pd.DataFrame(
+        last_row_info, columns=["Table", "Country", "Category", "Year"]
+    )
 
     processed_last_row_info = []
     all_tables = df_last_row_info["Table"].unique()
     all_years = set(df_last_row_info["Year"].unique())
     all_years = set([year for year in all_years if isinstance(year, int)])
-    all_years = set([year for year in all_years if year > 1989])
+    all_years = set([year for year in all_years if year > 1989])  # noqa: PLR2004
     for table in all_tables:
         df_last_row_current_table = df_last_row_info[df_last_row_info["Table"] == table]
         all_countries = df_last_row_current_table["Country"].unique()
         for country in all_countries:
-            df_current_country_table = df_last_row_current_table[df_last_row_current_table["Country"] == country]
+            df_current_country_table = df_last_row_current_table[
+                df_last_row_current_table["Country"] == country
+            ]
             all_categories = df_current_country_table["Category"].unique()
             cats_country = ""
             for cat in all_categories:
-                years_category = df_current_country_table[df_current_country_table["Category"] == cat]["Year"].unique()
+                years_category = df_current_country_table[
+                    df_current_country_table["Category"] == cat
+                ]["Year"].unique()
                 if set(years_category) == all_years:
                     cats_country = f"{cats_country}; {cat}"
                 else:
@@ -326,5 +380,7 @@ def save_last_row_info(
         file.parents[1].mkdir()
     if not file.parents[0].exists():
         file.parents[0].mkdir()
-    df_processed_lost_row_info = pd.DataFrame(processed_last_row_info, columns=["Table", "Country", "Categories"])
+    df_processed_lost_row_info = pd.DataFrame(
+        processed_last_row_info, columns=["Table", "Country", "Categories"]
+    )
     df_processed_lost_row_info.to_csv("test_last_row_info.csv", index=False)

+ 188 - 141
src/unfccc_ghg_data/unfccc_crf_reader/unfccc_crf_reader_prod.py

@@ -1,3 +1,6 @@
+"""
+Functions for CRF/CRT reading - productions functions for full reading
+"""
 
 from datetime import date
 from typing import Optional, Union
@@ -26,9 +29,6 @@ from .unfccc_crf_reader_core import (
 from .unfccc_crf_reader_devel import save_last_row_info, save_unknown_categories_info
 from .util import NoCRFFilesError, all_crf_countries
 
-#import sys
-#sys.path.append(code_path.name)
-
 # functions:
 # * testing fucntions
 # ** read one or more table(s) for all countries
@@ -41,18 +41,20 @@ from .util import NoCRFFilesError, all_crf_countries
 # general approach:
 # main code in a function that reads on table from one file.
 # return raw pandas DF for use in different functions
-# wrappers around this function to read for a whole country or for test reading where we also
-# write files with missing sectors etc.
+# wrappers around this function to read for a whole country or for test reading
+# where we also write files with missing sectors etc.
 # merging functions use native pm2 format
 
 
-def read_crf_for_country(
-        country_code: str,
-        submission_year: int,
-        submission_date: Optional[str]=None,
-        re_read: Optional[bool]=True,
+def read_crf_for_country(  # noqa: PLR0912, PLR0915
+    country_code: str,
+    submission_year: int,
+    submission_date: Optional[str] = None,
+    re_read: Optional[bool] = True,
 ) -> xr.Dataset:
     """
+    Read for given submission year and country.
+
     Read CRF data for given submission year and country. All tables
     available in the specification will be read for all years. Result
     will be written to appropriate country folder.
@@ -73,62 +75,61 @@ def read_crf_for_country(
     they are saved in the folder 'log' under the file name
     'country_reading_<country_code>_<date>_X.csv'.
 
-
     Parameters
     ----------
-    __________
-
     country_codes: str
         ISO 3-letter country code
-
     submission_year: int
         Year of the submission of the data
-
     submission_data: Optional(str)
         Read for a specific submission date (given as string as in the file names)
         If not specified latest data will be read
-
     re_read: Optional(bool) default: True
         Read the data also if it's already present
 
     Returns
     -------
-    _______
         return value is a Pandas DataFrame with the read data in PRIMAP2 format
     """
     # get country name
     country_name = get_country_name(country_code)
 
-
     # get specification
     # if we only have a single country check if we might have a country specific
     # specification (currently only Australia, 2023)
     try:
         crf_spec = getattr(crf, f"CRF{submission_year}_{country_code}")
-        print(f"Using country specific specification: "
-              f"CRF{submission_year}_{country_code}")
-    except:
+        print(
+            f"Using country specific specification: "
+            f"CRF{submission_year}_{country_code}"
+        )
+    except Exception:
         # no country specific specification, check for general specification
         try:
             crf_spec = getattr(crf, f"CRF{submission_year}")
-        except:
-            raise ValueError(
+        except Exception as ex:
+            raise ValueError(  # noqa: TRY003
                 f"No terminology exists for submission year " f"{submission_year}"
-            )
-
-
-    tables = [table for table in crf_spec.keys()
-              if crf_spec[table]["status"] == "tested"]
-    print(f"The following tables are available in the " \
-          f"CRF{submission_year} specification: {tables}")
+            ) from ex
+
+    tables = [
+        table for table in crf_spec.keys() if crf_spec[table]["status"] == "tested"
+    ]
+    print(
+        f"The following tables are available in the "
+        f"CRF{submission_year} specification: {tables}"
+    )
 
     if submission_date is None:
         submission_date = get_latest_date_for_country(country_code, submission_year)
 
     # check if data has been read already
     read_data = not submission_has_been_read(
-        country_code, country_name, submission_year=submission_year,
-        submission_date=submission_date, verbose=True,
+        country_code,
+        country_name,
+        submission_year=submission_year,
+        submission_date=submission_date,
+        verbose=True,
     )
 
     ds_all = None
@@ -138,7 +139,8 @@ def read_crf_for_country(
         for table in tables:
             # read table for all years
             ds_table, new_unknown_categories, new_last_row_info = read_crf_table(
-                country_code, table, submission_year, date=submission_date)#, data_year=[1990])
+                country_code, table, submission_year, date=submission_date
+            )  # , data_year=[1990])
 
             # collect messages on unknown rows etc
             unknown_categories = unknown_categories + new_unknown_categories
@@ -157,9 +159,11 @@ def read_crf_for_country(
             ds_table_if = convert_crf_table_to_pm2if(
                 ds_table,
                 submission_year,
-                meta_data_input={"title": f"Data submitted in {submission_year} to the UNFCCC "
-                                          f"in the common reporting format (CRF) by {country_name}. "
-                                          f"Submission date: {submission_date}"},
+                meta_data_input={
+                    "title": f"Data submitted in {submission_year} to the UNFCCC "
+                    f"in the common reporting format (CRF) by {country_name}. "
+                    f"Submission date: {submission_date}"
+                },
                 entity_mapping=entity_mapping,
             )
 
@@ -168,26 +172,30 @@ def read_crf_for_country(
 
             # if individual data for emissions and removals / recovery exist combine
             # them
-            if (('CO2 removals' in ds_table_pm2.data_vars) and
-                    ('CO2 emissions' in ds_table_pm2.data_vars) and "CO2" not in ds_table_pm2.data_vars):
+            if (
+                ("CO2 removals" in ds_table_pm2.data_vars)
+                and ("CO2 emissions" in ds_table_pm2.data_vars)
+                and "CO2" not in ds_table_pm2.data_vars
+            ):
                 # we can just sum to CO2 as we made sure that it doesn't exist.
                 # If we have CO2 and removals but not emissions, CO2 already has
                 # removals subtracted and we do nothing here
-                ds_table_pm2["CO2"] = ds_table_pm2[["CO2 emissions",
-                                                "CO2 removals"]].pr.sum(
-                    dim="entity", skipna=True, min_count=1
-                )
+                ds_table_pm2["CO2"] = ds_table_pm2[
+                    ["CO2 emissions", "CO2 removals"]
+                ].pr.sum(dim="entity", skipna=True, min_count=1)
                 ds_table_pm2["CO2"].attrs["entity"] = "CO2"
 
-            if (('CH4 removals' in ds_table_pm2.data_vars) and
-                    ('CH4 emissions' in ds_table_pm2.data_vars) and "CH4" not in ds_table_pm2.data_vars):
+            if (
+                ("CH4 removals" in ds_table_pm2.data_vars)
+                and ("CH4 emissions" in ds_table_pm2.data_vars)
+                and "CH4" not in ds_table_pm2.data_vars
+            ):
                 # we can just sum to CH4 as we made sure that it doesn't exist.
                 # If we have CH4 and removals but not emissions, CH4 already has
                 # removals subtracted and we do nothing here
-                ds_table_pm2["CH4"] = ds_table_pm2[["CH4 emissions",
-                                                "CH4 removals"]].pr.sum(
-                    dim="entity", skipna=True, min_count=1
-                )
+                ds_table_pm2["CH4"] = ds_table_pm2[
+                    ["CH4 emissions", "CH4 removals"]
+                ].pr.sum(dim="entity", skipna=True, min_count=1)
                 ds_table_pm2["CH4"].attrs["entity"] = "CH4"
 
             # combine per table DS
@@ -201,19 +209,29 @@ def read_crf_for_country(
         if len(unknown_categories) > 0:
             save_data = False
             today = date.today()
-            log_location = log_path / f"CRF{submission_year}" \
-                           / f"{country_code}_unknown_categories_{today.strftime('%Y-%m-%d')}.csv"
-            print(f"Unknown rows found for {country_code}. Not saving data. Savin log to "
-                  f"{log_location}" )
+            log_location = (
+                log_path
+                / f"CRF{submission_year}"
+                / f"{country_code}_unknown_categories_{today.strftime('%Y-%m-%d')}.csv"
+            )
+            print(
+                f"Unknown rows found for {country_code}. Not saving data. Savin log to "
+                f"{log_location}"
+            )
             save_unknown_categories_info(unknown_categories, log_location)
 
         if len(last_row_info) > 0:
             save_data = False
             today = date.today()
-            log_location = log_path / f"CRF{submission_year}" \
-                           / f"{country_code}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
-            print(f"Data found in the last row found for {country_code}. Not saving data. Savin log to "
-                  f"{log_location}")
+            log_location = (
+                log_path
+                / f"CRF{submission_year}"
+                / f"{country_code}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
+            )
+            print(
+                f"Data found in the last row found for {country_code}. "
+                f"Not saving data. Saving log to {log_location}"
+            )
             save_last_row_info(last_row_info, log_location)
 
         if save_data:
@@ -225,37 +243,37 @@ def read_crf_for_country(
                 output_folder.mkdir()
 
             # write data in interchange format
-            pm2.pm2io.write_interchange_format(output_folder / output_filename,
-                                               ds_all.pr.to_interchange_format())
+            pm2.pm2io.write_interchange_format(
+                output_folder / output_filename, ds_all.pr.to_interchange_format()
+            )
 
             # write data in native PRIMAP2 format
             encoding = {var: compression for var in ds_all.data_vars}
-            ds_all.pr.to_netcdf(output_folder / (output_filename + ".nc"),
-                                  encoding=encoding)
+            ds_all.pr.to_netcdf(
+                output_folder / (output_filename + ".nc"), encoding=encoding
+            )
 
     return ds_all
 
 
 def read_crf_for_country_datalad(
-        country: str,
-        submission_year: int,
-        submission_date: Optional[str]=None,
-        re_read: Optional[bool]=True
+    country: str,
+    submission_year: int,
+    submission_date: Optional[str] = None,
+    re_read: Optional[bool] = True,
 ) -> None:
     """
+    Prepare input for read_crf_for_country
+
     Wrapper around read_crf_for_country which takes care of selecting input
     and output files and using datalad run to trigger the data reading
 
     Parameters
     ----------
-    __________
-
     country_codes: str
         ISO 3-letter country code
-
     submission_year: int
         Year of the submission of the data
-
     submission_date: Optional(str)
         Read for a specific submission date (given as string as in the file names)
         If not specified latest data will be read
@@ -263,18 +281,21 @@ def read_crf_for_country_datalad(
     """
     # get all the info for the country
     country_info = get_input_and_output_files_for_country(
-        country, submission_year=submission_year, verbose=True)
+        country, submission_year=submission_year, verbose=True
+    )
 
     print(f"Attempting to read data for CRF{submission_year} from {country}.")
-    print("#"*80)
+    print("#" * 80)
     print("")
     print("Using the unfccc_crf_reader")
     print("")
     print("Run the script using datalad run via the python api")
     script = code_path / "unfccc_crf_reader" / "read_unfccc_crf_submission.py"
 
-    cmd = f"./venv/bin/python3 {script.as_posix()} --country={country} "\
-          f"--submission_year={submission_year} --submission_date={submission_date}"
+    cmd = (
+        f"./venv/bin/python3 {script.as_posix()} --country={country} "
+        f"--submission_year={submission_year} --submission_date={submission_date}"
+    )
     if re_read:
         cmd = cmd + " --re_read"
     datalad.api.run(
@@ -289,11 +310,13 @@ def read_crf_for_country_datalad(
 
 
 def read_new_crf_for_year(
-        submission_year: int,
-        countries: Optional[list[str]]=None,
-        re_read: Optional[bool]=False,
+    submission_year: int,
+    countries: list[str] | None = None,
+    re_read: bool | None = False,
 ) -> dict:
     """
+    Read CRF for given countries
+
     Read CRF data for given submission year for all countries in
     `countries` that have submitted data. If no `countries` list is
     given, all countries are used.
@@ -310,15 +333,11 @@ def read_new_crf_for_year(
 
     Parameters
     ----------
-    __________
-
     submission_year: int
         Year of the submission of the data
-
     countries: List[int] (optional)
         List of countries to read. If not given reading is tried for all
         CRF countries
-
     re_read: bool (optional, default=False)
         If true data will be read even if already read before.
 
@@ -326,7 +345,6 @@ def read_new_crf_for_year(
 
     Returns
     -------
-    _______
         list[str]: list with country codes for which the data has been read
 
     """
@@ -347,49 +365,61 @@ def read_new_crf_for_year(
         except Exception as ex:
             print(f"Data for country {country}, {submission_year} could not be read")
             print(f"The following error occurred: {ex}")
-            read_countries[country]= "failed"
+            read_countries[country] = "failed"
 
     # print overview
-    successful_countries = [country for country in read_countries if read_countries[country] == "read"]
-    skipped_countries = [country for country in read_countries if read_countries[country] == "skipped"]
-    failed_countries = [country for country in read_countries if read_countries[country] == "failed"]
-    no_data_countries = [country for country in read_countries if read_countries[country] == "no data"]
+    successful_countries = [
+        country for country in read_countries if read_countries[country] == "read"
+    ]
+    skipped_countries = [
+        country for country in read_countries if read_countries[country] == "skipped"
+    ]
+    failed_countries = [
+        country for country in read_countries if read_countries[country] == "failed"
+    ]
+    no_data_countries = [
+        country for country in read_countries if read_countries[country] == "no data"
+    ]
 
     print(f"Read data for countries {successful_countries}")
     print(f"Skipped countries {skipped_countries}")
     print(f"No data for countries {no_data_countries}")
     print(f"!!!!! Reading failed for {failed_countries}. Check why")
-    return(read_countries)
+    return read_countries
 
 
 def read_new_crf_for_year_datalad(
-        submission_year: int,
-        countries: Optional[list[str]] = None,
-        re_read: Optional[bool] = False,
+    submission_year: int,
+    countries: Optional[list[str]] = None,
+    re_read: Optional[bool] = False,
 ) -> None:
     """
+    Prepare input for read_crf_for_year
+
     Wrapper around read_crf_for_year_datalad which takes care of selecting input
     and output files and using datalad run to trigger the data reading
 
     Parameters
     ----------
-    __________
-
     submission_year: int
         Year of the submission of the data
-
     countries: List[int] (optional)
         List of countries to read. If not given reading is tried for all
         CRF countries
-
     re_read: bool (optional, default=False)
         If true data will be read even if already read before.
 
     """
     if countries is not None:
-        print(f"Reading CRF{submission_year} for countries {countries} using unfccc_crf_reader.")
+        print(
+            f"Reading CRF{submission_year} for countries {countries} "
+            f"using unfccc_crf_reader."
+        )
     else:
-        print(f"Reading CRF{submission_year} for all countries using unfccc_crf_reader.")
+        print(
+            f"Reading CRF{submission_year} for all countries "
+            f"using unfccc_crf_reader."
+        )
         countries = all_crf_countries
     print("#" * 80)
     print("")
@@ -398,7 +428,6 @@ def read_new_crf_for_year_datalad(
     else:
         print("Only reading new submissions not read yet.")
 
-
     input_files = []
     output_files = []
     # loop over countries to collect input and output files
@@ -406,14 +435,16 @@ def read_new_crf_for_year_datalad(
     for country in countries:
         try:
             country_info = get_input_and_output_files_for_country(
-                country, submission_year=submission_year, verbose=False)
+                country, submission_year=submission_year, verbose=False
+            )
             # check if the submission has been read already
             if re_read:
                 input_files = input_files + country_info["input"]
                 output_files = output_files + country_info["output"]
             else:
                 data_read = submission_has_been_read(
-                    country_info["code"], country_info["name"],
+                    country_info["code"],
+                    country_info["name"],
                     submission_year=submission_year,
                     submission_date=country_info["date"],
                     verbose=False,
@@ -421,36 +452,41 @@ def read_new_crf_for_year_datalad(
                 if not data_read:
                     input_files = input_files + country_info["input"]
                     output_files = output_files + country_info["output"]
-        except:
-            # no error handling here as that is done in the function that does the actual reading
+        except Exception:  # noqa: S110
+            # no error handling here as that is done in the function that does
+            # the actual reading
             pass
 
     print("Run the script using datalad run via the python api")
     script = code_path / "unfccc_crf_reader" / "read_new_unfccc_crf_for_year.py"
 
-    #cmd = f"./venv/bin/python3 {script.as_posix()} --countries={countries} "\
+    # cmd = f"./venv/bin/python3 {script.as_posix()} --countries={countries} "\
     #      f"--submission_year={submission_year}"
-    cmd = f"./venv/bin/python3 {script.as_posix()} " \
-          f"--submission_year={submission_year}"
+    # TODO this doesn't work with poetry
+    cmd = (
+        f"./venv/bin/python3 {script.as_posix()} "
+        f"--submission_year={submission_year}"
+    )
 
     if re_read:
         cmd = cmd + " --re_read"
     datalad.api.run(
         cmd=cmd,
         dataset=root_path,
-        message=f"Read data for {countries}, CRF{submission_year}. Re-reading: {re_read}",
+        message=f"Read data for {countries}, CRF{submission_year}. "
+        f"Re-reading: {re_read}",
         inputs=input_files,
         outputs=output_files,
         dry_run=None,
-        #explicit=True,
+        # explicit=True,
     )
 
 
 def get_input_and_output_files_for_country(
-        country: str,
-        submission_year: int,
-        submission_date: Optional[str]=None,
-        verbose: Optional[bool]=True,
+    country: str,
+    submission_year: int,
+    submission_date: Optional[str] = None,
+    verbose: Optional[bool] = True,
 ) -> dict[str, Union[list, str]]:
     """
     Get input and output files for a given country
@@ -472,44 +508,49 @@ def get_input_and_output_files_for_country(
         if verbose:
             print("No submission date given, find latest date.")
         submission_date = get_latest_date_for_country(country_code, submission_year)
-    else:
-        if verbose:
-            print(f"Using given submissions date {submission_date}")
+    elif verbose:
+        print(f"Using given submissions date {submission_date}")
 
     if submission_date is None:
         # there is no data. Raise an exception
-        raise NoCRFFilesError(f"No submissions found for {country_code}, "
-                              f"submission_year={submission_year}, "
-                              f"date={date}")
-    else:
-        if verbose:
-            print(f"Latest submission date for CRF{submission_year} is {submission_date}")
+        raise NoCRFFilesError(  # noqa: TRY003
+            f"No submissions found for {country_code}, "
+            f"submission_year={submission_year}, "
+            f"date={date}"
+        )
+    elif verbose:
+        print(f"Latest submission date for CRF{submission_year} is {submission_date}")
     country_info["date"] = submission_date
 
     # get possible input files
-    input_files = get_crf_files(country_codes=country_code,
-                                submission_year=submission_year,
-                                date=submission_date)
+    input_files = get_crf_files(
+        country_codes=country_code,
+        submission_year=submission_year,
+        date=submission_date,
+    )
     if not input_files:
-        raise NoCRFFilesError(f"No possible input files found for {country}, CRF{submission_year}, "
-                              f"v{submission_date}. Are they already submitted and included in the "
-                              f"repository?")
+        raise NoCRFFilesError(  # noqa: TRY003
+            f"No possible input files found for {country}, CRF{submission_year}, "
+            f"v{submission_date}. Are they already submitted and included in the "
+            f"repository?"
+        )
     elif verbose:
         print("Found the following input_files:")
         for file in input_files:
             print(file.name)
         print("")
 
-
     # convert file's path to str
     input_files = [file.as_posix() for file in input_files]
     country_info["input"] = input_files
 
     # get output file
     output_folder = extracted_data_path_UNFCCC / country_name.replace(" ", "_")
-    output_files = [output_folder / f"{country_code}_CRF{submission_year}"
-                                    f"_{submission_date}.{suffix}" for suffix
-                    in ['yaml', 'csv', 'nc']]
+    output_files = [
+        output_folder / f"{country_code}_CRF{submission_year}"
+        f"_{submission_date}.{suffix}"
+        for suffix in ["yaml", "csv", "nc"]
+    ]
     if verbose:
         print("The following files are considered as output_files:")
         for file in output_files:
@@ -526,11 +567,11 @@ def get_input_and_output_files_for_country(
 
 
 def submission_has_been_read(
-        country_code: str,
-        country_name: str,
-        submission_year: int,
-        submission_date: str,
-        verbose: Optional[bool]=True,
+    country_code: str,
+    country_name: str,
+    submission_year: int,
+    submission_date: str,
+    verbose: Optional[bool] = True,
 ) -> bool:
     """
     Check if a CRF submission has already been read
@@ -543,19 +584,25 @@ def submission_has_been_read(
         if all(suffix in existing_suffixes for suffix in [".nc", ".yaml", ".csv"]):
             has_been_read = True
             if verbose:
-                print(f"Data already available for {country_code}, "
-                      f"CRF{submission_year}, version {submission_date}.")
+                print(
+                    f"Data already available for {country_code}, "
+                    f"CRF{submission_year}, version {submission_date}."
+                )
         else:
             has_been_read = False
             if verbose:
-                print(f"Partial data available for {country_code}, "
-                      f"CRF{submission_year}, version {submission_date}. "
-                      "Please check if all files have been written after "
-                      "reading.")
+                print(
+                    f"Partial data available for {country_code}, "
+                    f"CRF{submission_year}, version {submission_date}. "
+                    "Please check if all files have been written after "
+                    "reading."
+                )
     else:
         has_been_read = False
         if verbose:
-            print(f"No read data available for {country_code}, "
-                  f"CRF{submission_year}, version {submission_date}. ")
+            print(
+                f"No read data available for {country_code}, "
+                f"CRF{submission_year}, version {submission_date}. "
+            )
 
     return has_been_read

+ 53 - 10
src/unfccc_ghg_data/unfccc_crf_reader/util.py

@@ -1,16 +1,59 @@
+"""Definitions and errors for the CRF reader"""
 
 all_crf_countries = [
-    'AUS', 'AUT', 'BEL', 'BGR', 'BLR',
-    'CAN', 'CHE', 'CYP', 'CZE', 'DEU', # 10
-    'DKE', 'DNK', 'DNM', 'ESP', 'EST',
-    'EUA', 'EUC', 'FIN', 'FRA', 'FRK', # 20
-    'GBK', 'GBR', 'GRC', 'HRV', 'HUN',
-    'IRL', 'ISL', 'ITA', 'JPN', 'KAZ', # 30
-    'LIE', 'LTU', 'LUX', 'LVA', 'MCO',
-    'MLT', 'NLD', 'NOR', 'NZL', 'POL', # 40
-    'PRT', 'ROU', 'RUS', 'SVK', 'SVN',
-    'SWE', 'TUR', 'UKR', 'USA', # 49
+    "AUS",
+    "AUT",
+    "BEL",
+    "BGR",
+    "BLR",
+    "CAN",
+    "CHE",
+    "CYP",
+    "CZE",
+    "DEU",  # 10
+    "DKE",
+    "DNK",
+    "DNM",
+    "ESP",
+    "EST",
+    "EUA",
+    "EUC",
+    "FIN",
+    "FRA",
+    "FRK",  # 20
+    "GBK",
+    "GBR",
+    "GRC",
+    "HRV",
+    "HUN",
+    "IRL",
+    "ISL",
+    "ITA",
+    "JPN",
+    "KAZ",  # 30
+    "LIE",
+    "LTU",
+    "LUX",
+    "LVA",
+    "MCO",
+    "MLT",
+    "NLD",
+    "NOR",
+    "NZL",
+    "POL",  # 40
+    "PRT",
+    "ROU",
+    "RUS",
+    "SVK",
+    "SVN",
+    "SWE",
+    "TUR",
+    "UKR",
+    "USA",  # 49
 ]
 
+
 class NoCRFFilesError(Exception):
+    """Error raised when no CRF files are found"""
+
     pass

+ 4 - 2
src/unfccc_ghg_data/unfccc_di_reader/__init__.py

@@ -1,6 +1,8 @@
-# submodule to read data from UNFCCC DI API using the unfccc_di_api package
+"""
+submodule to read data from UNFCCC DI API using the unfccc_di_api package
+"""
 
-#import unfccc_di_api
+# import unfccc_di_api
 from .unfccc_di_reader_core import (
     convert_DI_data_to_pm2_if,
     convert_DI_IF_data_to_pm2,

+ 10 - 4
src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country.py

@@ -1,5 +1,7 @@
 """
-This script is a wrapper around the read__for_country
+call process_and_save_UNFCCC_DI_for_country
+
+This script is a wrapper around the process_and_save_UNFCCC_DI_for_country
 function such that it can be called from datalad
 """
 
@@ -9,9 +11,13 @@ from unfccc_ghg_data.unfccc_di_reader import process_and_save_UNFCCC_DI_for_coun
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--country', help='Country code')
-    parser.add_argument('--date', help='String with date to read and process. If not '
-                                       'given latest data will be used', default=None)
+    parser.add_argument("--country", help="Country code")
+    parser.add_argument(
+        "--date",
+        help="String with date to read and process. If not "
+        "given latest data will be used",
+        default=None,
+    )
     args = parser.parse_args()
 
     country_code = args.country

部分文件因为文件数量过多而无法显示