Procházet zdrojové kódy

restructure repo and make it a package. Also work on CRF2023 specs

Johannes Gütschow před 1 rokem
rodič
revize
361eaac03f
58 změnil soubory, kde provedl 635 přidání a 192 odebrání
  1. 2 2
      .gitignore
  2. 202 0
      LICENSE
  3. 1 1
      Makefile
  4. 5 5
      UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
  5. 24 17
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
  6. 24 13
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
  7. 20 20
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
  8. 3 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
  9. 0 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
  10. 0 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
  11. 135 23
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
  12. 2 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
  13. 10 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
  14. 2 2
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
  15. 2 2
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
  16. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
  17. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
  18. 7 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
  19. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
  20. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
  21. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
  22. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
  23. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
  24. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
  25. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
  26. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
  27. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
  28. 5 5
      UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
  29. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
  30. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
  31. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
  32. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
  33. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
  34. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
  35. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
  36. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
  37. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
  38. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
  39. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
  40. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
  41. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
  42. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
  43. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
  44. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
  45. 6 0
      UNFCCC_GHG_data/UNFCCC_reader/__init__.py
  46. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/country_info.py
  47. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
  48. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
  49. 35 35
      UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
  50. 4 4
      UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
  51. 8 0
      UNFCCC_GHG_data/__init__.py
  52. 0 12
      code/requirements.txt
  53. 25 23
      dodo.py
  54. 8 0
      pyproject.toml
  55. 1 0
      requirements.txt
  56. 1 0
      requirements_dev.txt
  57. 72 0
      setup.cfg
  58. 5 0
      setup.py

+ 2 - 2
.gitignore

@@ -5,7 +5,7 @@ __pycache__
 /JG_test_code/
 .doit.db
 log
-code/datasets
-code/UNFCCC_DI_reader
+UNFCCC_GHG_data/datasets
+UNFCCC_GHG_data/UNFCCC_DI_reader
 datasets/UNFCCC/DI_NAI
 

+ 202 - 0
LICENSE

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 1 - 1
Makefile

@@ -3,7 +3,7 @@ help:
 	echo Options:
 	echo make venv: create virtual environment
 
-venv: code/requirements.txt
+venv: UNFCCC_GHG_data
 	[ -d ./venv ] || python3 -m venv venv
 	./venv/bin/pip install --upgrade pip
 	./venv/bin/pip install -Ur code/requirements.txt

+ 5 - 5
code/UNFCCC_CRF_reader/CRF_raw_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py

@@ -17,16 +17,16 @@ from datetime import date
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
 #log_path = root_path / "log"
-code_path = root_path / "code"
+code_path = root_path / "UNFCCC_GHG_data"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 dataset_path = root_path / "datasets" / "UNFCCC"
 
 #sys.path.append(code_path.name)
 
-from util import all_crf_countries
-from UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
-from UNFCCC_CRF_reader_prod import submission_has_been_read
+from .util import all_crf_countries
+from .UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
+from .UNFCCC_CRF_reader_prod import submission_has_been_read
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
@@ -46,7 +46,7 @@ for country in all_crf_countries:
         # check if the latest submission has been read already
 
         data_read = submission_has_been_read(
-            country_info["code"], country_info["name"],
+            country_info["UNFCCC_GHG_data"], country_info["name"],
             submission_year=submission_year,
             submission_date=country_info["date"],
             verbose=False,

+ 24 - 17
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py

@@ -17,8 +17,8 @@ from operator import itemgetter
 from collections import Counter
 from typing import Dict, List, Optional, Tuple, Union
 from datetime import datetime, timedelta
-import crf_specifications as crf
-from util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
+from . import crf_specifications as crf
+from .util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
 
 
 ### reading functions
@@ -144,7 +144,8 @@ def convert_crf_table_to_pm2if(
         #coords_value_filling=coords_value_filling,
         filter_remove=filter_remove,
         filter_keep=filter_keep,
-        meta_data=meta_data
+        meta_data=meta_data,
+        time_format="%Y",
     )
     return df_table_if
 
@@ -170,7 +171,7 @@ def read_crf_table(
     __________
 
     country_codes: str or list[str]
-        ISO 3-letter country code or list of country codes
+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
 
     table: str
         name of the table sheet in the CRF xlsx file
@@ -265,8 +266,8 @@ def read_crf_table(
                 df_all = pd.concat([df_this_file, df_all])
                 unknown_rows = unknown_rows + unknown_rows_this_file
                 last_row_info = last_row_info + last_row_info_this_file
-        except:
-            print(f"Year could not be converted to int for file {file}. Skipping file.")
+        except Exception as e:
+            print(f"Error when reading file {file}. Skipping file. Exception: {e}")
 
     return df_all, unknown_rows, last_row_info
 
@@ -359,8 +360,15 @@ def read_crf_table_from_file(
     df_header = df_header.replace(r"Unnamed: [0-9]{1,2}", np.nan, regex=True)
     header = []
     # fill nans with the last value from the left
-    for row in range(0, len(df_header)):
-        header.append(list(df_header.iloc[row].fillna(method="ffill")))
+    if "header_fill" in table_properties:
+        for row in range(0, len(df_header)):
+            if table_properties["header_fill"][row]:
+                header.append(list(df_header.iloc[row].fillna(method="ffill")))
+            else:
+                header.append(list(df_header.iloc[row]))
+    else:
+        for row in range(0, len(df_header)):
+            header.append(list(df_header.iloc[row].fillna(method="ffill")))
 
     # combine all non-unit rows into one
     entities = None
@@ -391,7 +399,6 @@ def read_crf_table_from_file(
 
     df_current.iloc[0] = units
     df_current.columns = entities
-    #### standardized header is finalized
 
     # remove all columns to ignore
     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
@@ -533,7 +540,7 @@ def get_crf_files(
     __________
 
     country_codes: str or list[str]
-        ISO 3-letter country code or list of country codes
+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
 
     submission_year: int
         Year of the submission of the data
@@ -644,7 +651,7 @@ def get_info_from_crf_filename(
     Returns
     _______
     dict with fields:
-        party: the party that submitted the data (3 letter code)
+        party: the party that submitted the data (3 letter UNFCCC_GHG_data)
         submission_year: year of submission
         data_year: year in which the meissions took place
         date: date of the submission
@@ -680,8 +687,8 @@ def filter_filenames(
         List with pathlib.Path objects for the filenames to filter
 
     party: Optional[Union[str, List[str]]] (default: None)
-        List of country codes or single country code. If given only files
-        for this(these) country-code(s) will be returned.
+        List of country codes or single country UNFCCC_GHG_data. If given only files
+        for this(these) country-UNFCCC_GHG_data(s) will be returned.
 
     data_year: Optional[Union[int, List[int]]] (default: None)
         List of data years or single year. If given only files for this
@@ -878,7 +885,7 @@ def filter_category(
         mapping: List
             mapping for a single category
         country: str
-            iso 3-letter code of the country
+            iso 3-letter UNFCCC_GHG_data of the country
 
     Returns
     _______
@@ -918,7 +925,7 @@ def get_latest_date_for_country(
     Parameters
     __________
     country: str
-        3-letter country code
+        3-letter country UNFCCC_GHG_data
 
     submission_year: int
         Year of the submission to find the l;atest date for
@@ -1054,7 +1061,7 @@ def find_latest_date(
 def get_country_name(
         country_code: str,
 ) -> str:
-    """get country name from code """
+    """get country name from UNFCCC_GHG_data """
     if country_code in custom_country_mapping:
         country_name = custom_country_mapping[country_code]
     else:
@@ -1062,7 +1069,7 @@ def get_country_name(
             country = pycountry.countries.get(alpha_3=country_code)
             country_name = country.name
         except:
-            raise ValueError(f"Country code {country_code} can not be mapped to "
+            raise ValueError(f"Country UNFCCC_GHG_data {country_code} can not be mapped to "
                              f"any country")
 
     return country_name

+ 24 - 13
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py

@@ -14,21 +14,24 @@ from pathlib import Path
 from datetime import date
 
 
-from util import all_crf_countries
-from util import log_path
-import crf_specifications as crf
-from UNFCCC_CRF_reader_core import get_country_name
-from UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
+from .util import all_crf_countries
+from .util import log_path
+from . import crf_specifications as crf
+from .UNFCCC_CRF_reader_core import get_country_name
+from .UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
 
 def read_year_to_test_specs(
         submission_year: int,
         data_year: Optional[int]=None,
+        totest: Optional[bool]=False,
 ) -> xr.Dataset:
     """
     Read one xlsx file (so one data year) for each country for a submission year to
     create log files and extend the specifications
 
+    totest: if true only read tables with "totest" status
+
     """
     if data_year is None:
         data_year=2000
@@ -37,6 +40,8 @@ def read_year_to_test_specs(
     last_row_info = []
     ds_all = None
     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
+    if totest:
+        print("Reading only tables to test.")
     print("#"*80)
     try:
         crf_spec = getattr(crf, f"CRF{submission_year}")
@@ -44,8 +49,12 @@ def read_year_to_test_specs(
         raise ValueError(f"No terminology exists for submission years {submission_year}, "
                          f"{submission_year - 1}")
 
-    tables = [table for table in crf_spec.keys()
-              if crf_spec[table]["status"] == "tested"]
+    if totest:
+        tables = [table for table in crf_spec.keys()
+                  if crf_spec[table]["status"] == "totest"]
+    else:
+        tables = [table for table in crf_spec.keys()
+                  if crf_spec[table]["status"] == "tested"]
     print(f"The following tables are available in the " \
           f"CRF{submission_year} specification: {tables}")
     print("#" * 80)
@@ -101,9 +110,9 @@ def read_year_to_test_specs(
                         ds_all = ds_table_pm2
                     else:
                         ds_all = ds_all.combine_first(ds_table_pm2)
-                except:
-                    print(f"Error occured when converting table {table} for {country_name} to"
-                          f" PRIMAP2 IF.")
+                except Exception as e:
+                    print(f"Error occured when converting table {table} for"
+                          f" {country_name} to PRIMAP2 IF. Exception: {e}")
                     # TODO: error handling and logging
 
     # process log messages.
@@ -116,8 +125,8 @@ def read_year_to_test_specs(
 
     if len(last_row_info) > 0:
         log_location = log_path / f"CRF{submission_year}" \
-                       / f"{data_yar}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
-        print(f"Data found in the last row. Savin log to "
+                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
+        print(f"Data found in the last row. Saving log to "
               f"{log_location}")
         save_last_row_info(last_row_info, log_location)
 
@@ -125,6 +134,8 @@ def read_year_to_test_specs(
     compression = dict(zlib=True, complevel=9)
     output_folder = log_path / f"test_read_CRF{submission_year}"
     output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
+    if totest:
+        output_filename = output_filename + "_totest"
 
     if not output_folder.exists():
         output_folder.mkdir()

+ 20 - 20
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py

@@ -13,24 +13,24 @@ from datetime import date
 #from pathlib import Path
 from typing import Optional, List, Dict, Union
 
-#from . import crf_specifications as crf
-import crf_specifications as crf
-
-from UNFCCC_CRF_reader_core import read_crf_table
-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
-from UNFCCC_CRF_reader_core import get_latest_date_for_country
-from UNFCCC_CRF_reader_core import get_crf_files
-from UNFCCC_CRF_reader_core import get_country_name
-from UNFCCC_CRF_reader_devel import save_unknown_categories_info
-from UNFCCC_CRF_reader_devel import save_last_row_info
-
-from util import code_path, log_path, \
+from . import crf_specifications as crf
+#import crf_specifications as crf
+
+from .UNFCCC_CRF_reader_core import read_crf_table
+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
+from .UNFCCC_CRF_reader_core import get_latest_date_for_country
+from .UNFCCC_CRF_reader_core import get_crf_files
+from .UNFCCC_CRF_reader_core import get_country_name
+from .UNFCCC_CRF_reader_devel import save_unknown_categories_info
+from .UNFCCC_CRF_reader_devel import save_last_row_info
+
+from .util import code_path, log_path, \
     custom_country_mapping, extracted_data_path, root_path, \
     all_crf_countries, NoCRFFilesError
 
-import sys
-sys.path.append(code_path.name)
-from UNFCCC_reader.get_submissions_info import get_country_code
+#import sys
+#sys.path.append(code_path.name)
+from ..UNFCCC_reader import get_country_code
 
 
 # functions:
@@ -45,7 +45,7 @@ from UNFCCC_reader.get_submissions_info import get_country_code
 
 
 # general approach:
-# main code in a function that reads on table from one file.
+# main UNFCCC_GHG_data in a function that reads on table from one file.
 # return raw pandas DF for use in different functions
 # wrappers around this function to read for a whole country or for test reading where we also
 # write files with missing sectors etc.
@@ -84,7 +84,7 @@ def read_crf_for_country(
     __________
 
     country_codes: str
-        ISO 3-letter country code
+        ISO 3-letter country UNFCCC_GHG_data
 
     submission_year: int
         Year of the submission of the data
@@ -220,7 +220,7 @@ def read_crf_for_country_datalad(
     __________
 
     country_codes: str
-        ISO 3-letter country code
+        ISO 3-letter country UNFCCC_GHG_data
 
     submission_year: int
         Year of the submission of the data
@@ -382,7 +382,7 @@ def read_new_crf_for_year_datalad(
                 output_files = output_files + country_info["output"]
             else:
                 data_read = submission_has_been_read(
-                    country_info["code"], country_info["name"],
+                    country_info["UNFCCC_GHG_data"], country_info["name"],
                     submission_year=submission_year,
                     submission_date=country_info["date"],
                     verbose=False,
@@ -438,7 +438,7 @@ def get_input_and_output_files_for_country(
         country_code = get_country_code(country)
     # now get the country name
     country_name = get_country_name(country_code)
-    country_info["code"] = country_code
+    country_info["UNFCCC_GHG_data"] = country_code
     country_info["name"] = country_name
 
     # determine latest data

+ 3 - 1
code/UNFCCC_CRF_reader/__init__.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py

@@ -3,6 +3,8 @@ CRF reader module
 """
 
 #from pathlib import Path
-#from . import crf_specifications
+from . import crf_specifications
 from .UNFCCC_CRF_reader_prod import read_crf_for_country, read_crf_for_country_datalad
 
+__all__ = ["crf_specifications", "read_crf_for_country", "read_crf_for_country_datalad"]
+

+ 0 - 0
code/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py


+ 0 - 0
code/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py


+ 135 - 23
code/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py

@@ -42,7 +42,7 @@ TODO:
 import numpy as np
 from .util import unit_info
 
-CRF2022 = {
+CRF2023 = {
     "Table1s1": {
         "status": "tested",
         "table": {
@@ -715,6 +715,7 @@ CRF2022 = {
             ['Fossil part of biodiesel', ['1.A.3.b.i', 'OLBiodieselFC'], 4],  # LTU
             ['Other', ['1.A.3.b.i', 'OLOther'], 4],  # UKR, MLT
             ['Other Liquid Fuels', ['1.A.3.b.i', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.i', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.i', 'OMotorFuels'], 4],  # RUS
             ['Lubricants in 2-stroke engines', ['1.A.3.b.i', 'Lubricants'], 4],  # HUN
             ['LNG', ['1.A.3.b.i', 'LNG'], 4],  ## USA
@@ -746,6 +747,7 @@ CRF2022 = {
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Other', ['1.A.3.b.ii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.ii', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.ii', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.ii', 'OMotorFuels'], 4],  # RUS
             ['LNG', ['1.A.3.b.ii', 'LNG'], 4],  ## USA
             ['Gaseous fuels', ['1.A.3.b.ii', 'Gaseous'], 3],
@@ -774,6 +776,7 @@ CRF2022 = {
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.iii', 'OLBiodieselFC'], 4],  # CAN
             ['Other', ['1.A.3.b.iii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.iii', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.iii', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.iii', 'OMotorFuels'], 4],  # RUS
             ['LNG', ['1.A.3.b.iii', 'LNG'], 4],  # USA
             ['GTL', ['1.A.3.b.iii', 'GTL'], 4],  # MCO, new in 2022
@@ -802,6 +805,7 @@ CRF2022 = {
             ['Lubricant Oil', ['1.A.3.b.iv', 'Lubricants'], 4],  # PRT
             ['Other', ['1.A.3.b.iv', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.iv', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.iv', 'OLOther'], 4],  # SWE new in 2023
             ['Lube', ['1.A.3.b.iv', 'Lubricants'], 4],  # MCO
             ['Lubricants in 2-stroke engines', ['1.A.3.b.iv', 'Lubricants'], 4],  # HUN
             ['Lubricants (two-stroke engines)', ['1.A.3.b.iv', 'Lubricants'], 4],  # ESP
@@ -853,7 +857,7 @@ CRF2022 = {
             ['Biomass', ['1.A.3.b.v.6', 'Biomass'], 4],
             ['Other Fossil Fuels (please specify)', ['1.A.3.b.v.6', 'OtherFF'], 4],
             # BEL
-            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Total'], 3],
+            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Lubricants'], 3],
             ['Other Liquid Fuels (please specify)', ['1.A.3.b.v.7', 'OtherLiquid'], 4],
             # ROU
             ['Gaseous Fuels', ['1.A.3.b.v.8', 'Total'], 3],
@@ -925,6 +929,9 @@ CRF2022 = {
             ['Fuel oil C', ['1.A.3.d', 'FuelOilC'], 3],  # JPN
             ['Diesel Oil', ['1.A.3.d', 'OLDiesel'], 3],  # FIN
             ['Other Liquid Fuels', ['1.A.3.d', 'OLOther'], 3],  # ROU, new in 2022
+            ['Heating and Other Gasoil', ['1.A.3.d', 'OLHeatingOtherGasoil'], 3],
+            # ROU, new in 2023
+            ['Liquified Petroleum Gas', ['1.A.3.d', 'OLLPG'], 3],  # ROU, new in 2023
             ['Gaseous fuels', ['1.A.3.d', 'Gaseous'], 2],
             ['Biomass(6)', ['1.A.3.d', 'Biomass'], 2],
             ['Other fossil fuels (please specify)(4)', ['1.A.3.d', 'OtherFF'], 2],
@@ -1137,6 +1144,7 @@ CRF2022 = {
             ['heavy fuel oil', ['1.A.4.c.ii', 'HeavyFuelOil'], 4],  # NOR
             ['Other motor fuels', ['1.A.4.c.ii', 'OMotorFuels'], 4],  # RUS
             ['Biodiesel (5 percent fossil portion)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
+            ['Lubricating Oil (Two-Stroke Engines)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Gaseous fuels', ['1.A.4.c.ii', 'Gaseous'], 3],
             ['Biomass(6)', ['1.A.4.c.ii', 'Biomass'], 3],
             ['Other fossil fuels (please specify)(4)', ['1.A.4.c.ii', 'OtherFF'], 3],
@@ -1448,6 +1456,8 @@ CRF2022 = {
             ['Flaring', ['1.B.1.c.i'], 1],  # UKR, AUS
             ['Flaring of gas', ['1.B.1.c.i'], 1],  # SWE
             ['Coal Dumps', ['1.B.1.c.ii'], 1],  # JPN
+            ['Uncontrolled combustion and burning coal dumps', ['1.B.1.c.ii'], 1],
+            # JPN since 2023
             ['SO2 scrubbing', ['1.B.1.c.iii'], 1],  # SVN
             ['Flaring of coke oven gas', ['1.B.1.c.iv'], 1],  # KAZ
             ['Emisson from Coke Oven Gas Subsystem', ['1.B.1.c.iv'], 1],  # POL
@@ -2280,6 +2290,8 @@ CRF2022 = {
             ['Mechanical-Biological Treatment MBT', ['5.E.2']],  # DEU
             ['Accidental fires', ['5.E.3']],  # DEU, DKE, DNK, DNM
             ['Decomposition of Petroleum-Derived Surfactants', ['5.E.4']],  # JPN
+            ['Decomposition of Fossil-fuel Derived Surfactants', ['5.E.4']],
+            # JPN since 2023
             ['Other non-specified', ['5.E.5']],  # USA
             ['Biogas burning without energy recovery', ['5.E.6']],  # PRT
             ['Sludge spreading', ['5.E.7']],  # ESP
@@ -2462,13 +2474,16 @@ CRF2022 = {
             ['Other (please specify)', ['5.C.2.a.ii'], 2],
             ['agricultural waste', ['5.C.2.a.ii.1'], 3],  # ITA
             ['Agricultural residues', ['5.C.2.a.ii.1'], 3],  # ESP
+            ['Agriculture residues', ['5.C.2.a.ii.1'], 3],  # PRT
             ['Natural residues', ['5.C.2.a.ii.2'], 3],  # CHE
             ['Wood waste', ['5.C.2.a.ii.3'], 3],  # GBR, GBK
             ['Bonfires etc.', ['5.C.2.a.ii.4'], 3],  # DEU
             ['Bonfires', ['5.C.2.a.ii.4'], 3],  # NLD, ISL
             ['Other', ['5.C.2.a.ii.5'], 3],  # EST
             ['Other waste', ['5.C.2.a.ii.5'], 3],  # CZE
+            ['Waste', ['5.C.2.a.ii.5'], 3],  # GBR
             ['Industrial Solid Waste', ['5.C.2.a.ii.6'], 3],  # JPN
+            ['Vine', ['5.C.2.a.ii.7'], 3], # AUT
             ['Non-biogenic', ['5.C.2.b'], 1],
             ['Municipal solid waste', ['5.C.2.b.i'], 2],
             ['Other (please specify)', ['5.C.2.b.ii'], 2],
@@ -2478,6 +2493,7 @@ CRF2022 = {
             ['Bonfires', ['5.C.2.b.ii.4'], 3],  # ISL
             ['Other', ['5.C.2.b.ii.5'], 3],  # EST
             ['Other waste', ['5.C.2.b.ii.5'], 3],  # CZE
+            ['Waste', ['5.C.2.b.ii.5'], 3],  # GBR
             ['Industrial Solid Waste', ['5.C.2.b.ii.6'], 3],  # JPN
         ],
         "entity_mapping": {
@@ -2528,41 +2544,137 @@ CRF2022 = {
         },
     },  # tested
     "Summary1.As1": {  # Summary 1, sheet 1
-        "status": "TODO",
+        "status": "tested",
          "table": {
             "firstrow": 5,
-            "lastrow": 26,
+            "lastrow": 28,
             "header": ['entity', 'unit'],
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "cols_to_ignore": [],
             "stop_cats": ["", np.nan],
-            "unit_info": unit_info["default"],
+            "unit_info": unit_info["summary"],
         },
         "sector_mapping": [
-            ['Total Energy', ['1']],
-            ['A. Fuel combustion activities (sectoral approach)', ['1.A']],
+            ['Total national emissions and removals', ['0']],
+            ['1. Energy', ['1']],
+            ['A. Fuel combustion Reference approach(2)', ['1.A-ref']],
+            ['Sectoral approach(2)', ['1.A']],
             ['1. Energy industries', ['1.A.1']],
-            ['a. Public electricity and heat production', ['1.A.1.a']],
-            ['b. Petroleum refining', ['1.A.1.b']],
-            ['c. Manufacture of solid fuels and other energy industries', ['1.A.1.c']],
             ['2. Manufacturing industries and construction', ['1.A.2']],
-            ['a. Iron and steel', ['1.A.2.a']],
-            ['b. Non-ferrous metals', ['1.A.2.b']],
-            ['c. Chemicals', ['1.A.2.c']],
-            ['d. Pulp, paper and print', ['1.A.2.d']],
-            ['e. Food processing, beverages and tobacco', ['1.A.2.e']],
-            ['f. Non-metallic minerals', ['1.A.2.f']],
-            ['g. Other (please specify)', ['1.A.2.g']],
             ['3. Transport', ['1.A.3']],
-            ['a. Domestic aviation', ['1.A.3.a']],
-            ['b. Road transportation', ['1.A.3.b']],
-            ['c. Railways', ['1.A.3.c']],
-            ['d. Domestic navigation', ['1.A.3.d']],
-            ['e. Other transportation', ['1.A.3.e']],
+            ['4. Other sectors', ['1.A.4']],
+            ['5. Other', ['1.A.5']],
+            ['B. Fugitive emissions from fuels', ['1.B']],
+            ['1. Solid fuels', ['1.B.1']],
+            ['2. Oil and natural gas and other emissions from energy production',
+             ['1.B.2']],
+            ['C. CO2 Transport and storage', ['1.C']],
+            ['2. Industrial processes and product use', ['2']],
+            ['A. Mineral industry', ['2.A']],
+            ['B. Chemical industry', ['2.B']],
+            ['C. Metal industry', ['2.C']],
+            ['D. Non-energy products from fuels and solvent use', ['2.D']],
+            ['E. Electronic industry', ['2.E']],
+            ['F. Product uses as substitutes for ODS', ['2.F']],
+            ['G. Other product manufacture and use', ['2.G']],
+            ['H. Other(3)', ['2.H']],
         ],
         "entity_mapping": {
-            "NOX": "NOx",
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs(1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
+        },
+        "coords_defaults": {
+            "class": "Total",
+        },
+    },  # tested
+    "Summary1.As2": {  # Summary 1, sheet 2
+        "status": "tested",
+         "table": {
+            "firstrow": 5,
+            "lastrow": 34,
+            "header": ['entity', 'entity', 'unit'],
+            "header_fill": [True, False, True],
+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
+            "categories": ["category"],
+            "cols_to_ignore": [],
+            "stop_cats": ["", np.nan],
+            "unit_info": unit_info["summary"],
+        },
+        "sector_mapping": [
+            ['3. Agriculture', ['3']],
+            ['A. Enteric fermentation', ['3.A']],
+            ['B. Manure management', ['3.B']],
+            ['C. Rice cultivation', ['3.C']],
+            ['D. Agricultural soils', ['3.D']],
+            ['E. Prescribed burning of savannas', ['3.E']],
+            ['F. Field burning of agricultural residues', ['3.F']],
+            ['G. Liming', ['3.G']],
+            ['H. Urea application', ['3.H']],
+            ['I. Other carbon-contining fertilizers', ['3.I']],
+            ['J. Other', ['3.J']],
+            ['4. Land use, land-use change and forestry (4)', ['4']],
+            ['A. Forest land (4)', ['4.A']],
+            ['B. Cropland (4)', ['4.B']],
+            ['C. Grassland (4)', ['4.C']],
+            ['D. Wetlands (4)', ['4.D']],
+            ['E. Settlements (4)', ['4.E']],
+            ['F. Other land (4)', ['4.F']],
+            ['G. Harvested wood products', ['4.G']],
+            ['H. Other (4)', ['4.H']],
+            ['5. Waste', ['5']],
+            ['A. Solid waste disposal (5)', ['5.A']],
+            ['B. Biological treatment of solid waste (5)', ['5.B']],
+            ['C. Incineration and open burning of waste (5)', ['5.C']],
+            ['D. Wastewater treatment and discharge', ['5.D']],
+            ['E. Other (5)', ['5.E']],
+            ['6. Other (please specify)(6)', ['6']],
+        ],
+        "entity_mapping": {
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs (1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
+        },
+        "coords_defaults": {
+            "class": "Total",
+        },
+    },  # tested
+    "Summary1.As3": {  # Summary 1, sheet 3
+        "status": "tested",
+         "table": {
+            "firstrow": 5,
+            "lastrow": 17,
+            "header": ['entity', 'entity', 'unit'],
+            "header_fill": [True, False, True],
+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
+            "categories": ["category"],
+            "cols_to_ignore": [],
+            "stop_cats": ["", np.nan],
+            "unit_info": unit_info["summary"],
+        },
+        "sector_mapping": [
+            ['Memo items:(7)', ['\IGNORE']],
+            ['International bunkers', ['M.Memo.Int']],
+            ['Aviation', ['M.Memo.Int.Avi']],
+            ['Navigation', ['M.Memo.Int.Mar']],
+            ['Multilateral operations', ['M.Memo.Mult']],
+            ['CO2 emissions from biomass', ['M.Memo.Bio']],
+            ['CO2 captured', ['M.Memo.CO2Cap']],
+            ['Long-term storage of C in waste disposal sites', ['M.Memo.LTSW']],
+            ['Indirect N2O', ['M.Memo.IndN2O']],
+            ['Indirect CO2', ['M.Memo.IndCO2']],
+        ],
+        "entity_mapping": {
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs(1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
         },
         "coords_defaults": {
             "class": "Total",

+ 2 - 0
code/UNFCCC_CRF_reader/crf_specifications/__init__.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py

@@ -5,3 +5,5 @@ Define the CRF specifications here for easy access
 from .CRF2021_specification import CRF2021
 from .CRF2022_specification import CRF2022
 from .CRF2023_specification import CRF2023
+
+__all__ = ["CRF2021", "CRF2022", "CRF2023"]

+ 10 - 0
code/UNFCCC_CRF_reader/crf_specifications/util.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py

@@ -29,4 +29,14 @@ unit_info = {
         },
         "default_unit": "kt",
     },
+    "summary": {  # contains fgas mixtures in CO2 eq units
+        "unit_row": 0,
+        "entity_row": "header",
+        "regexp_entity": r".*",
+        "regexp_unit": r"\((.*)\)",
+        "manual_repl_unit": {
+            "(kt CO2 equivalent)": "kt CO2eq",
+        },
+        "default_unit": "kt",
+    },
 }

+ 2 - 2
code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py

@@ -3,11 +3,11 @@ This script is a wrapper around the read_crf_for_country
 function such that it can be called from datalad
 """
 
-from UNFCCC_CRF_reader_prod import read_crf_for_country
+from .UNFCCC_CRF_reader_prod import read_crf_for_country
 import argparse
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--re_read', help='Read data also if already read before',

+ 2 - 2
code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py

@@ -4,11 +4,11 @@ from doit in the current setup where doit runs on system python and
 not in the venv.
 """
 
-from UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
+from .UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
 import argparse
 
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission_year', help='Submission round to read')
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--re_read', help='Read data also if already read before',

+ 1 - 1
code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py

@@ -3,7 +3,7 @@ This script is a wrapper around the read_crf_for_country
 function such that it can be called from datalad
 """
 
-from UNFCCC_CRF_reader_prod import read_new_crf_for_year
+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year
 import argparse
 
 parser = argparse.ArgumentParser()

+ 1 - 1
code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py

@@ -4,7 +4,7 @@ from doit in the current setup where doit runs on system python and
 not in the venv.
 """
 
-from UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
 from util import NoCRFFilesError
 import argparse
 

+ 7 - 1
code/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py

@@ -3,21 +3,27 @@ This script is a wrapper around the read_year_to_test_specs
 function such that it can be called from datalad
 """
 
-from UNFCCC_CRF_reader_devel import read_year_to_test_specs
+from UNFCCC_GHG_data.UNFCCC_CRF_reader.UNFCCC_CRF_reader_devel import read_year_to_test_specs
 import argparse
 
 parser = argparse.ArgumentParser()
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
+parser.add_argument('--totest', help='read tables to test', action='store_true')
 args = parser.parse_args()
 
 
 submission_year = args.submission_year
 data_year = args.data_year
+if args.totest:
+    totest = True
+else:
+    totest = False
 
 read_year_to_test_specs(
     submission_year=submission_year,
     data_year=data_year,
+    totest=totest,
 )
 
 

+ 1 - 1
code/UNFCCC_CRF_reader/util.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py

@@ -4,7 +4,7 @@ from pathlib import Path
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
 log_path = root_path / "log"
-code_path = root_path / "code"
+code_path = root_path / "UNFCCC_GHG_data"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 

+ 0 - 0
UNFCCC_GHG_data/UNFCCC_downloader/__init__.py


+ 0 - 0
code/UNFCCC_downloader/download_annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py


+ 0 - 0
code/UNFCCC_downloader/download_ndc.py → UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py


+ 0 - 0
code/UNFCCC_downloader/download_non-annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_bur.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_nc.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py


+ 0 - 0
code/UNFCCC_downloader/unfccc_submission_info.py → UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py


+ 5 - 5
code/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py

@@ -86,7 +86,7 @@ cat_codes_manual = {  # conversion to PRIMAP1 format
     'S/N': 'MMULTIOP',
 }
 
-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
 
 time_format = "%Y"
 
@@ -219,7 +219,7 @@ for page in pages_to_read:
     if page in range(232, 235):
         df_current.iloc[
             metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
@@ -253,7 +253,7 @@ for page in pages_to_read:
 
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     # then the regex replacements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
                                                                 regex=True)
 
@@ -311,7 +311,7 @@ for page in pages_to_read_fgases:
         dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
     df_current = df_current.drop(idx_header)
 
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
@@ -350,7 +350,7 @@ for page in pages_to_read_fgases:
 
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     # then the regex repalcements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
                                                                 regex=True)
 

+ 1 - 1
code/UNFCCC_reader/Chile/config_CHL_BUR4.py → UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py

@@ -64,7 +64,7 @@ filter_remove_IPCC2006 = {
 }
 
 
-cat_mapping = { # categories not listed here have the same code as in IPCC 2006 specifications
+cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
     '3': 'M.AG',
     '3.A': '3.A.1',
     '3.A.1': '3.A.1.a',

+ 2 - 2
code/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py

@@ -52,7 +52,7 @@ unit_info = {
 }
 cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
                 'Categorías de fuente y sumidero de gases de efecto invernadero.1']
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
 
 # operations on long format DF
@@ -169,7 +169,7 @@ for year in years_to_read:
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
     # drop the columns which are empty and repetition of the metadata for the second block
     df_current.drop(cols_to_drop, axis=1, inplace=True)
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set multi-index. necessary for the stack operation in the conversion to long format

+ 0 - 0
code/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py


+ 3 - 3
code/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py

@@ -38,7 +38,7 @@ year = 2019
 entity_row = 0
 unit_row = 1
 index_cols = "Categories"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 
@@ -51,7 +51,7 @@ cat_codes_manual = {
     #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
 }
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9]{1,4})\s.*'
 
 coords_cols = {
     "category": "category",
@@ -202,7 +202,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 

+ 0 - 0
code/UNFCCC_reader/Mexico/config_MEX_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py


+ 3 - 3
code/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py

@@ -32,7 +32,7 @@ entity_row = 0
 unit_row = 1
 
 index_cols = "Categorías de fuentes y sumideros de GEI"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 units = {
@@ -53,7 +53,7 @@ cat_codes_manual = {
     '2F6 Otras aplicaciones': '2F6',
 }
 
-cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
+cat_code_regexp = r'^\[(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,3})\].*'
 
 coords_cols = {
     "category": "category",
@@ -168,7 +168,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 

+ 0 - 0
code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py


+ 1 - 1
code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py

@@ -129,7 +129,7 @@ for i, table in enumerate(tables):
         unit_parts = unit.split(" ")
         unit = f"{unit_parts[0]} CO2eq"
 
-    # remove "/n" from category code and name columns
+    # remove "/n" from category UNFCCC_GHG_data and name columns
     df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
     df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
 

+ 0 - 0
code/UNFCCC_reader/Morocco/config_MAR_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py


+ 3 - 3
code/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py

@@ -32,7 +32,7 @@ pages_to_read = range(104, 138)
 
 compression = dict(zlib=True, complevel=9)
 
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 index_cols = ['Catégories']
@@ -58,7 +58,7 @@ cat_codes_manual = {
     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
 }
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
 
 coords_terminologies = {
     "area": "ISO3",
@@ -171,7 +171,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 

+ 0 - 0
code/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py


+ 2 - 2
code/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py

@@ -37,7 +37,7 @@ years_to_read = range(1990, 2019 + 1)
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 cols_to_read = range(1, 2019 - 1990 + 3)
 
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['분야·부문/연도']
 
 sheet_metadata = {
@@ -136,7 +136,7 @@ for sheet in sheets_to_read:
     # read current sheet (one sheet per gas)
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
                                engine="openpyxl")
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format

+ 2 - 2
code/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py

@@ -32,7 +32,7 @@ years_to_read = range(1990, 2018 + 1)
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 cols_to_read = range(1, 2018 - 1990 + 3)
 
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['분야·부문/연도']
 
 sheet_metadata = {
@@ -131,7 +131,7 @@ for sheet in sheets_to_read:
     # read current sheet (one sheet per gas)
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
                                engine="openpyxl")
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format

+ 0 - 0
code/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py → UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py


+ 2 - 2
code/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py

@@ -32,7 +32,7 @@ if not output_folder.exists():
 output_filename = 'TWN_inventory_2022_'
 inventory_file = '00_abstract_en.pdf'
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
 
 time_format = "%Y"
 
@@ -227,7 +227,7 @@ for table_name in table_defs.keys():
     df_this_table["category"] = df_this_table["category"].replace(
         table_def["cat_codes_manual"])
     # then the regex replacements
-    repl = lambda m: m.group('code')
+    repl = lambda m: m.group('UNFCCC_GHG_data')
     df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
                                                                       repl, regex=True)
 

+ 3 - 3
code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py

@@ -44,7 +44,7 @@ unit_row = 1
 gwp_to_use = "AR4GWP100"
 
 index_cols = "Greenhouse gas source and sink categories"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 # manual category codes
@@ -54,7 +54,7 @@ cat_codes_manual = {
     'CO2 from Biomass': 'MBIO',
 }
 
-cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
+cat_code_regexp = r'^(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,4})[\s\.].*'
 
 coords_cols = {
     "category": "category",
@@ -184,7 +184,7 @@ df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
 # first the manual replacements
 df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_inventory_long = df_inventory_long.reset_index(drop=True)
 

+ 6 - 0
UNFCCC_GHG_data/UNFCCC_reader/__init__.py

@@ -0,0 +1,6 @@
+# expose some of the functions to the outside as they are used in other readers as well
+# TODO: create a unified util module for all readers
+
+from .get_submissions_info import get_country_code
+
+__all__ = ["get_country_code"]

+ 1 - 1
code/UNFCCC_reader/country_info.py → UNFCCC_GHG_data/UNFCCC_reader/country_info.py

@@ -8,7 +8,7 @@ from get_submissions_info import get_country_datasets
 # Find the right function and possible input and output files and
 # read the data using datalad run.
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 args = parser.parse_args()
 country = args.country
 

+ 0 - 0
code/UNFCCC_reader/folder_mapping.json → UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json


+ 0 - 0
code/UNFCCC_reader/folder_mapping.py → UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py


+ 35 - 35
code/UNFCCC_reader/get_submissions_info.py → UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py

@@ -9,7 +9,7 @@ import pycountry
 
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
-code_path = root_path / "code" / "UNFCCC_reader"
+code_path = root_path / "UNFCCC_GHG_data" / "UNFCCC_reader"
 # beware, folders below are different than for CRF reader
 downloaded_data_path = root_path / "downloaded_data"
 extracted_data_path = root_path / "extracted_data"
@@ -46,14 +46,14 @@ def get_country_submissions(
         print_sub: bool = True,
 ) -> Dict[str, List[str]]:
     """
-    Input is a three letter ISO code for a country, or the countries name.
-    The function tries to map the country name to an ISO code and then
+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the countries name.
+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
     queries the folder mapping files for folders.
 
     Parameters
     ----------
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
         print_sub: bool
             If True information on submissions will be written to stdout
@@ -70,7 +70,7 @@ def get_country_submissions(
     country_code = get_country_code(country_name)
 
     if print_sub:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
     country_submissions = {}
     if print_sub:
@@ -115,14 +115,14 @@ def get_country_datasets(
         print_ds: bool = True,
 ) -> Dict[str, List[str]]:
     """
-    Input is a three letter ISO code for a country, or the country's name.
-    The function tries to map the country name to an ISO code and then
-    checks the code and data folders for content on the country.
+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the country's name.
+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
+    checks the UNFCCC_GHG_data and data folders for content on the country.
 
     Parameters
     ----------
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
         print_ds: bool
             If True information on submissions will be written to stdout
@@ -138,11 +138,11 @@ def get_country_datasets(
     data_folder_legacy = legacy_data_path
 
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
 
     if print_ds:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
     rep_data = {}
     # data
@@ -181,7 +181,7 @@ def get_country_datasets(
                     # process filename to get submission
                     parts = dataset.split('_')
                     if parts[0] != country_code:
-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
                     else:
                         terminology = "_".join(parts[3 : ])
                         key = f"{parts[1]} ({parts[2]}, {terminology})"
@@ -197,9 +197,9 @@ def get_country_datasets(
 
                         code_file = get_code_file(country_code, parts[1])
                         if code_file:
-                            data_info = data_info + f"code: {code_file.name}"
+                            data_info = data_info + f"UNFCCC_GHG_data: {code_file.name}"
                         else:
-                            data_info = data_info + f"code: not found"
+                            data_info = data_info + f"UNFCCC_GHG_data: not found"
 
                         cleaned_datasets_current_folder[key] = data_info
 
@@ -250,7 +250,7 @@ def get_country_datasets(
                     # process filename to get submission
                     parts = dataset.split('_')
                     if parts[0] != country_code:
-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
                     else:
                         terminology = "_".join(parts[3 : ])
                         key = f"{parts[1]} ({parts[2]}, {terminology}, legacy)"
@@ -288,13 +288,13 @@ def get_country_code(
         country_name: str,
 )->str:
     """
-    obtain country code. If the input is a code it will be returned, if the input
-    is not a three letter code a search will be performed
+    obtain country UNFCCC_GHG_data. If the input is a UNFCCC_GHG_data it will be returned, if the input
+    is not a three letter UNFCCC_GHG_data a search will be performed
 
     Parameters
     __________
     country_name: str
-        Country code or name to get the three-letter code for.
+        Country UNFCCC_GHG_data or name to get the three-letter UNFCCC_GHG_data for.
 
     """
     # First check if it's in the list of custom codes
@@ -302,7 +302,7 @@ def get_country_code(
         country_code = country_name
     else:
         try:
-            # check if it's a 3 letter code
+            # check if it's a 3 letter UNFCCC_GHG_data
             country = pycountry.countries.get(alpha_3=country_name)
             country_code = country.alpha_3
         except:
@@ -310,7 +310,7 @@ def get_country_code(
                 country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
             except:
                 raise ValueError(f"Country name {country_name} can not be mapped to "
-                                 f"any country code. Try using the ISO3 code directly.")
+                                 f"any country UNFCCC_GHG_data. Try using the ISO3 UNFCCC_GHG_data directly.")
             if len(country) > 1:
                 country_code = None
                 for current_country in country:
@@ -337,13 +337,13 @@ def get_possible_inputs(
     Parameters
     ----------
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
         submission: str
             String of the submission
 
         print_info: bool = False
-            If True print information on code found
+            If True print information on UNFCCC_GHG_data found
 
     Returns
     -------
@@ -352,11 +352,11 @@ def get_possible_inputs(
 
     data_folder = downloaded_data_path
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
 
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
     input_files = []
     for item in data_folder.iterdir():
@@ -399,7 +399,7 @@ def get_possible_outputs(
     Parameters
     ----------
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
         submission: str
             String of the submission
@@ -414,10 +414,10 @@ def get_possible_outputs(
 
     data_folder = extracted_data_path
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
     output_files = []
     for item in data_folder.iterdir():
@@ -457,17 +457,17 @@ def get_code_file(
     Parameters
     ----------
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
         submission: str
             String of the submission
 
         print_info: bool = False
-            If True print information on code found
+            If True print information on UNFCCC_GHG_data found
 
     Returns
     -------
-        returns a pathlib Path object for the code file
+        returns a pathlib Path object for the UNFCCC_GHG_data file
     """
 
     code_file_path = None
@@ -477,18 +477,18 @@ def get_code_file(
     if submission[0:3] == "CRF":
         return root_path / "UNFCCC_CRF_reader"
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
 
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
     with open(code_path / "folder_mapping.json", "r") as mapping_file:
         folder_mapping = json.load(mapping_file)
 
     if country_code not in folder_mapping:
         if print_info:
-            print("No code available")
+            print("No UNFCCC_GHG_data available")
             print("")
     else:
         country_folder = code_path / folder_mapping[country_code]
@@ -497,13 +497,13 @@ def get_code_file(
         for file in country_folder.iterdir():
             if file.match(code_file_name_candidate):
                 if code_file_path is not None:
-                    raise ValueError(f"Found multiple code candidates: "
+                    raise ValueError(f"Found multiple UNFCCC_GHG_data candidates: "
                                      f"{code_file_path} and file.name. "
                                      f"Please use only one file with name "
                                      f"'read_ISO3_submission_XXX.YYY'.")
                 else:
                     if print_info:
-                        print(f"Found code file {file.relative_to(root_path)}")
+                        print(f"Found UNFCCC_GHG_data file {file.relative_to(root_path)}")
                 code_file_path = file
 
     if code_file_path is not None:

+ 4 - 4
code/UNFCCC_reader/read_UNFCCC_submission.py → UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py

@@ -14,7 +14,7 @@ from get_submissions_info import get_possible_outputs
 # Find the right function and possible input and output files and
 # read the data using datalad run.
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission', help='Submission to read')
 
 args = parser.parse_args()
@@ -34,7 +34,7 @@ print("")
 script_name = get_code_file(country, submission)
 
 if script_name is not None:
-    print(f"Found code file {script_name}")
+    print(f"Found UNFCCC_GHG_data file {script_name}")
     print("")
 
     # get possible input files
@@ -77,8 +77,8 @@ if script_name is not None:
         explicit=True,
     )
 else:
-    # no code found.
-    print(f"No code found to read {submission} from {country}")
+    # no UNFCCC_GHG_data found.
+    print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
     print(f"Use 'doit country_info --country={country} to get "
           f"a list of available submissions and datasets.")
 

+ 8 - 0
UNFCCC_GHG_data/__init__.py

@@ -0,0 +1,8 @@
+####
+
+from . import UNFCCC_reader
+from . import UNFCCC_CRF_reader
+# import UNFCCC_DI_reader
+# import UNFCCC_downloader
+
+__all__ = ["UNFCCC_reader", "UNFCCC_CRF_reader"]

+ 0 - 12
code/requirements.txt

@@ -1,12 +0,0 @@
-bs4
-requests
-pandas
-selenium
-primap2
-countrynames
-pycountry
-datalad
-treelib
-camelot-py
-opencv-python
-ghostscript

+ 25 - 23
dodo.py

@@ -7,10 +7,12 @@ from doit import get_var
 def task_setup_venv():
     """Create virtual environment"""
     return {
-        'file_dep': ['code/requirements.txt'],
+        'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
         'actions': ['python3 -m venv venv',
-                    './venv/bin/pip install --upgrade pip',
-                    './venv/bin/pip install -Ur code/requirements.txt',
+                    './venv/bin/pip install --upgrade pip wheel',
+                    #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
+                    './venv/bin/pip install --upgrade --upgrade-strategy '
+                    'eager -e .[dev]',
                     'touch venv',],
         'targets': ['venv'],
         'verbosity': 2,
@@ -27,7 +29,7 @@ def task_map_folders():
     Create or update the folder mapping in the given folder
     """
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder={read_config_folder['folder']}"],
         'verbosity': 2,
         'setup': ['setup_venv'],
@@ -41,7 +43,7 @@ def task_update_bur():
         'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
         'actions': ['datalad run -m "Fetch BUR submissions" '
                     '-o downloaded_data/UNFCCC/submissions-bur.csv '
-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_bur.py'],
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
         'verbosity': 2,
         'setup': ['setup_venv'],
     }
@@ -55,8 +57,8 @@ def task_download_bur():
         # before download
         'actions': ['datalad run -m "Download BUR submissions" '
                     '-i downloaded_data/UNFCCC/submissions-bur.csv '
-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=BUR',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     ],
         'verbosity': 2,
@@ -70,7 +72,7 @@ def task_update_nc():
         'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
         'actions': ['datalad run -m "Fetch NC submissions" '
                     '-o downloaded_data/UNFCCC/submissions-nc.csv '
-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_nc.py'],
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
         'verbosity': 2,
         'setup': ['setup_venv'],
     }
@@ -84,8 +86,8 @@ def task_download_nc():
         # before download
         'actions': ['datalad run -m "Download NC submissions" '
                     '-i downloaded_data/UNFCCC/submissions-nc.csv '
-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=NC',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     ],
         'verbosity': 2,
@@ -108,7 +110,7 @@ def task_update_annexi():
         'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
                     "--explicit "
                     f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
-                    f"./venv/bin/python code/UNFCCC_downloader/fetch_submissions_annexI.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
                     f"--year={update_aI_config['year']}"],
         'verbosity': 2,
         'setup': ['setup_venv'],
@@ -124,9 +126,9 @@ def task_download_annexi():
         'actions': [f"datalad run -m 'Download AnnexI submissions for "
                     f"{update_aI_config['category']}{update_aI_config['year']}' "
                     f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
-                    f"./venv/bin/python code/UNFCCC_downloader/download_annexI.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
                     f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     ],
         'verbosity': 2,
@@ -138,8 +140,8 @@ def task_download_ndc():
     """ Download NDC submissions """
     return {
         'actions': ['datalad run -m "Download NDC submissions" '
-                    './venv/bin/python code/UNFCCC_downloader/download_ndc.py',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     ],
         'verbosity': 2,
@@ -157,11 +159,11 @@ read_config = {
 
 # TODO: make individual task for non-UNFCCC submissions
 def task_read_unfccc_submission():
-    """ Read submission for a country (if code exists) (not for CRF)"""
+    """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/read_UNFCCC_submission.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
                     f"--country={read_config['country']} --submission={read_config['submission']}",
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=extracted_data/UNFCCC"
                     ],
         'verbosity': 2,
@@ -182,11 +184,11 @@ read_config_crf = {
 def task_read_unfccc_crf_submission():
     """ Read CRF submission for a country """
     actions = [
-        f"./venv/bin/python code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
         f"--country={read_config_crf['country']} "
         f"--submission_year={read_config_crf['submission_year']} "
         f"--submission_date={read_config_crf['submission_date']} ",
-        f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
         f"--folder=extracted_data/UNFCCC"
         ]
     if read_config_crf["re_read"] == "True":
@@ -201,9 +203,9 @@ def task_read_unfccc_crf_submission():
 def task_read_new_unfccc_crf_for_year():
     """ Read CRF submission for all countries for given submission year. by default only reads
     data not present yet. Only reads the latest updated submission for each country."""
-    actions = [f"./venv/bin/python code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
+    actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
                f"--submission_year={read_config_crf['submission_year']} ",
-               f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+               f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                f"--folder=extracted_data/UNFCCC"
                ]
     # specifying countries is currently disabled duo to problems with command line
@@ -224,7 +226,7 @@ def task_country_info():
     """ Print information on submissions and datasets
     available for given country"""
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/country_info.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/country_info.py "
                     f"--country={read_config['country']}"],
         'verbosity': 2,
         'setup': ['setup_venv'],

+ 8 - 0
pyproject.toml

@@ -0,0 +1,8 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel",
+    "setuptools_scm[toml]>=3.4"
+]
+build-backend = "setuptools.build_meta"
+

+ 1 - 0
requirements.txt

@@ -0,0 +1 @@
+.

+ 1 - 0
requirements_dev.txt

@@ -0,0 +1 @@
+.[dev]

+ 72 - 0
setup.cfg

@@ -0,0 +1,72 @@
+[metadata]
+name = UNFCCC_GHG_data
+version = 0.2
+author = Johannes Gütschow
+author_email = mail@johannes-guetschow.de
+description = Tools to read GHG data submitted to the UNFCCC using various methods
+long_description = file: README.md
+long_description_content_type = text/md
+url = https://github.com/JGuetschow/UNFCCC_non-AnnexI_data
+#project_urls =
+classifiers =
+    Development Status :: 3 - Alpha
+    Intended Audience :: Science/Research
+    Topic :: Scientific/Engineering :: Atmospheric Science
+    License :: OSI Approved :: Apache Software License
+    Natural Language :: English
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+license = Apache Software License 2.0
+license_file = LICENSE
+
+[options]
+packages =
+    UNFCCC_GHG_data
+    UNFCCC_GHG_data.UNFCCC_CRF_reader
+    UNFCCC_GHG_data.UNFCCC_reader
+    UNFCCC_GHG_data.UNFCCC_downloader
+    #UNFCCC_GHG_data.UNFCCC_DI_reader
+    #UNFCCC_GHG_data.datasets
+python_requires = >=3.8
+setup_requires =
+    setuptools_scm
+install_requires =
+    bs4
+    requests
+    pandas
+    selenium
+    primap2
+    countrynames
+    pycountry
+    datalad
+    treelib
+    camelot-py
+    opencv-python
+    ghostscript
+
+[options.extras_require]
+dev =
+    pip
+    wheel
+    bs4
+    requests
+    pandas
+    selenium
+    primap2
+    countrynames
+    pycountry
+    datalad
+    treelib
+    camelot-py
+    opencv-python
+    ghostscript
+    ipykernel
+    jupyter
+
+
+[options.package_data]
+* =
+    *.csv
+    *.nc

+ 5 - 0
setup.py

@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+
+import setuptools
+
+setuptools.setup()