Browse Source

restructure repo and make it a package. Also work on CRF2023 specs

Johannes Gütschow 1 year ago
parent
commit
361eaac03f
58 changed files with 635 additions and 192 deletions
  1. 2 2
      .gitignore
  2. 202 0
      LICENSE
  3. 1 1
      Makefile
  4. 5 5
      UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
  5. 24 17
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
  6. 24 13
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
  7. 20 20
      UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
  8. 3 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
  9. 0 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
  10. 0 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
  11. 135 23
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
  12. 2 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
  13. 10 0
      UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
  14. 2 2
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
  15. 2 2
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
  16. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
  17. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
  18. 7 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
  19. 1 1
      UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
  20. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
  21. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
  22. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
  23. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
  24. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
  25. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
  26. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
  27. 0 0
      UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
  28. 5 5
      UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
  29. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
  30. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
  31. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
  32. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
  33. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
  34. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
  35. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
  36. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
  37. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
  38. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
  39. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
  40. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
  41. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
  42. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
  43. 2 2
      UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
  44. 3 3
      UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
  45. 6 0
      UNFCCC_GHG_data/UNFCCC_reader/__init__.py
  46. 1 1
      UNFCCC_GHG_data/UNFCCC_reader/country_info.py
  47. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
  48. 0 0
      UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
  49. 35 35
      UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
  50. 4 4
      UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
  51. 8 0
      UNFCCC_GHG_data/__init__.py
  52. 0 12
      code/requirements.txt
  53. 25 23
      dodo.py
  54. 8 0
      pyproject.toml
  55. 1 0
      requirements.txt
  56. 1 0
      requirements_dev.txt
  57. 72 0
      setup.cfg
  58. 5 0
      setup.py

+ 2 - 2
.gitignore

@@ -5,7 +5,7 @@ __pycache__
 /JG_test_code/
 /JG_test_code/
 .doit.db
 .doit.db
 log
 log
-code/datasets
-code/UNFCCC_DI_reader
+UNFCCC_GHG_data/datasets
+UNFCCC_GHG_data/UNFCCC_DI_reader
 datasets/UNFCCC/DI_NAI
 datasets/UNFCCC/DI_NAI
 
 

+ 202 - 0
LICENSE

@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 1 - 1
Makefile

@@ -3,7 +3,7 @@ help:
 	echo Options:
 	echo Options:
 	echo make venv: create virtual environment
 	echo make venv: create virtual environment
 
 
-venv: code/requirements.txt
+venv: UNFCCC_GHG_data
 	[ -d ./venv ] || python3 -m venv venv
 	[ -d ./venv ] || python3 -m venv venv
 	./venv/bin/pip install --upgrade pip
 	./venv/bin/pip install --upgrade pip
 	./venv/bin/pip install -Ur code/requirements.txt
 	./venv/bin/pip install -Ur code/requirements.txt

+ 5 - 5
code/UNFCCC_CRF_reader/CRF_raw_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py

@@ -17,16 +17,16 @@ from datetime import date
 root_path = Path(__file__).parents[2].absolute()
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
 root_path = root_path.resolve()
 #log_path = root_path / "log"
 #log_path = root_path / "log"
-code_path = root_path / "code"
+code_path = root_path / "UNFCCC_GHG_data"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 dataset_path = root_path / "datasets" / "UNFCCC"
 dataset_path = root_path / "datasets" / "UNFCCC"
 
 
 #sys.path.append(code_path.name)
 #sys.path.append(code_path.name)
 
 
-from util import all_crf_countries
-from UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
-from UNFCCC_CRF_reader_prod import submission_has_been_read
+from .util import all_crf_countries
+from .UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
+from .UNFCCC_CRF_reader_prod import submission_has_been_read
 
 
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
@@ -46,7 +46,7 @@ for country in all_crf_countries:
         # check if the latest submission has been read already
         # check if the latest submission has been read already
 
 
         data_read = submission_has_been_read(
         data_read = submission_has_been_read(
-            country_info["code"], country_info["name"],
+            country_info["UNFCCC_GHG_data"], country_info["name"],
             submission_year=submission_year,
             submission_year=submission_year,
             submission_date=country_info["date"],
             submission_date=country_info["date"],
             verbose=False,
             verbose=False,

+ 24 - 17
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py

@@ -17,8 +17,8 @@ from operator import itemgetter
 from collections import Counter
 from collections import Counter
 from typing import Dict, List, Optional, Tuple, Union
 from typing import Dict, List, Optional, Tuple, Union
 from datetime import datetime, timedelta
 from datetime import datetime, timedelta
-import crf_specifications as crf
-from util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
+from . import crf_specifications as crf
+from .util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
 
 
 
 
 ### reading functions
 ### reading functions
@@ -144,7 +144,8 @@ def convert_crf_table_to_pm2if(
         #coords_value_filling=coords_value_filling,
         #coords_value_filling=coords_value_filling,
         filter_remove=filter_remove,
         filter_remove=filter_remove,
         filter_keep=filter_keep,
         filter_keep=filter_keep,
-        meta_data=meta_data
+        meta_data=meta_data,
+        time_format="%Y",
     )
     )
     return df_table_if
     return df_table_if
 
 
@@ -170,7 +171,7 @@ def read_crf_table(
     __________
     __________
 
 
     country_codes: str or list[str]
     country_codes: str or list[str]
-        ISO 3-letter country code or list of country codes
+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
 
 
     table: str
     table: str
         name of the table sheet in the CRF xlsx file
         name of the table sheet in the CRF xlsx file
@@ -265,8 +266,8 @@ def read_crf_table(
                 df_all = pd.concat([df_this_file, df_all])
                 df_all = pd.concat([df_this_file, df_all])
                 unknown_rows = unknown_rows + unknown_rows_this_file
                 unknown_rows = unknown_rows + unknown_rows_this_file
                 last_row_info = last_row_info + last_row_info_this_file
                 last_row_info = last_row_info + last_row_info_this_file
-        except:
-            print(f"Year could not be converted to int for file {file}. Skipping file.")
+        except Exception as e:
+            print(f"Error when reading file {file}. Skipping file. Exception: {e}")
 
 
     return df_all, unknown_rows, last_row_info
     return df_all, unknown_rows, last_row_info
 
 
@@ -359,8 +360,15 @@ def read_crf_table_from_file(
     df_header = df_header.replace(r"Unnamed: [0-9]{1,2}", np.nan, regex=True)
     df_header = df_header.replace(r"Unnamed: [0-9]{1,2}", np.nan, regex=True)
     header = []
     header = []
     # fill nans with the last value from the left
     # fill nans with the last value from the left
-    for row in range(0, len(df_header)):
-        header.append(list(df_header.iloc[row].fillna(method="ffill")))
+    if "header_fill" in table_properties:
+        for row in range(0, len(df_header)):
+            if table_properties["header_fill"][row]:
+                header.append(list(df_header.iloc[row].fillna(method="ffill")))
+            else:
+                header.append(list(df_header.iloc[row]))
+    else:
+        for row in range(0, len(df_header)):
+            header.append(list(df_header.iloc[row].fillna(method="ffill")))
 
 
     # combine all non-unit rows into one
     # combine all non-unit rows into one
     entities = None
     entities = None
@@ -391,7 +399,6 @@ def read_crf_table_from_file(
 
 
     df_current.iloc[0] = units
     df_current.iloc[0] = units
     df_current.columns = entities
     df_current.columns = entities
-    #### standardized header is finalized
 
 
     # remove all columns to ignore
     # remove all columns to ignore
     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
@@ -533,7 +540,7 @@ def get_crf_files(
     __________
     __________
 
 
     country_codes: str or list[str]
     country_codes: str or list[str]
-        ISO 3-letter country code or list of country codes
+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
 
 
     submission_year: int
     submission_year: int
         Year of the submission of the data
         Year of the submission of the data
@@ -644,7 +651,7 @@ def get_info_from_crf_filename(
     Returns
     Returns
     _______
     _______
     dict with fields:
     dict with fields:
-        party: the party that submitted the data (3 letter code)
+        party: the party that submitted the data (3 letter UNFCCC_GHG_data)
         submission_year: year of submission
         submission_year: year of submission
         data_year: year in which the meissions took place
         data_year: year in which the meissions took place
         date: date of the submission
         date: date of the submission
@@ -680,8 +687,8 @@ def filter_filenames(
         List with pathlib.Path objects for the filenames to filter
         List with pathlib.Path objects for the filenames to filter
 
 
     party: Optional[Union[str, List[str]]] (default: None)
     party: Optional[Union[str, List[str]]] (default: None)
-        List of country codes or single country code. If given only files
-        for this(these) country-code(s) will be returned.
+        List of country codes or single country UNFCCC_GHG_data. If given only files
+        for this(these) country-UNFCCC_GHG_data(s) will be returned.
 
 
     data_year: Optional[Union[int, List[int]]] (default: None)
     data_year: Optional[Union[int, List[int]]] (default: None)
         List of data years or single year. If given only files for this
         List of data years or single year. If given only files for this
@@ -878,7 +885,7 @@ def filter_category(
         mapping: List
         mapping: List
             mapping for a single category
             mapping for a single category
         country: str
         country: str
-            iso 3-letter code of the country
+            iso 3-letter UNFCCC_GHG_data of the country
 
 
     Returns
     Returns
     _______
     _______
@@ -918,7 +925,7 @@ def get_latest_date_for_country(
     Parameters
     Parameters
     __________
     __________
     country: str
     country: str
-        3-letter country code
+        3-letter country UNFCCC_GHG_data
 
 
     submission_year: int
     submission_year: int
         Year of the submission to find the l;atest date for
         Year of the submission to find the l;atest date for
@@ -1054,7 +1061,7 @@ def find_latest_date(
 def get_country_name(
 def get_country_name(
         country_code: str,
         country_code: str,
 ) -> str:
 ) -> str:
-    """get country name from code """
+    """get country name from UNFCCC_GHG_data """
     if country_code in custom_country_mapping:
     if country_code in custom_country_mapping:
         country_name = custom_country_mapping[country_code]
         country_name = custom_country_mapping[country_code]
     else:
     else:
@@ -1062,7 +1069,7 @@ def get_country_name(
             country = pycountry.countries.get(alpha_3=country_code)
             country = pycountry.countries.get(alpha_3=country_code)
             country_name = country.name
             country_name = country.name
         except:
         except:
-            raise ValueError(f"Country code {country_code} can not be mapped to "
+            raise ValueError(f"Country UNFCCC_GHG_data {country_code} can not be mapped to "
                              f"any country")
                              f"any country")
 
 
     return country_name
     return country_name

+ 24 - 13
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py

@@ -14,21 +14,24 @@ from pathlib import Path
 from datetime import date
 from datetime import date
 
 
 
 
-from util import all_crf_countries
-from util import log_path
-import crf_specifications as crf
-from UNFCCC_CRF_reader_core import get_country_name
-from UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
+from .util import all_crf_countries
+from .util import log_path
+from . import crf_specifications as crf
+from .UNFCCC_CRF_reader_core import get_country_name
+from .UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
 
 
 def read_year_to_test_specs(
 def read_year_to_test_specs(
         submission_year: int,
         submission_year: int,
         data_year: Optional[int]=None,
         data_year: Optional[int]=None,
+        totest: Optional[bool]=False,
 ) -> xr.Dataset:
 ) -> xr.Dataset:
     """
     """
     Read one xlsx file (so one data year) for each country for a submission year to
     Read one xlsx file (so one data year) for each country for a submission year to
     create log files and extend the specifications
     create log files and extend the specifications
 
 
+    totest: if true only read tables with "totest" status
+
     """
     """
     if data_year is None:
     if data_year is None:
         data_year=2000
         data_year=2000
@@ -37,6 +40,8 @@ def read_year_to_test_specs(
     last_row_info = []
     last_row_info = []
     ds_all = None
     ds_all = None
     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
+    if totest:
+        print("Reading only tables to test.")
     print("#"*80)
     print("#"*80)
     try:
     try:
         crf_spec = getattr(crf, f"CRF{submission_year}")
         crf_spec = getattr(crf, f"CRF{submission_year}")
@@ -44,8 +49,12 @@ def read_year_to_test_specs(
         raise ValueError(f"No terminology exists for submission years {submission_year}, "
         raise ValueError(f"No terminology exists for submission years {submission_year}, "
                          f"{submission_year - 1}")
                          f"{submission_year - 1}")
 
 
-    tables = [table for table in crf_spec.keys()
-              if crf_spec[table]["status"] == "tested"]
+    if totest:
+        tables = [table for table in crf_spec.keys()
+                  if crf_spec[table]["status"] == "totest"]
+    else:
+        tables = [table for table in crf_spec.keys()
+                  if crf_spec[table]["status"] == "tested"]
     print(f"The following tables are available in the " \
     print(f"The following tables are available in the " \
           f"CRF{submission_year} specification: {tables}")
           f"CRF{submission_year} specification: {tables}")
     print("#" * 80)
     print("#" * 80)
@@ -101,9 +110,9 @@ def read_year_to_test_specs(
                         ds_all = ds_table_pm2
                         ds_all = ds_table_pm2
                     else:
                     else:
                         ds_all = ds_all.combine_first(ds_table_pm2)
                         ds_all = ds_all.combine_first(ds_table_pm2)
-                except:
-                    print(f"Error occured when converting table {table} for {country_name} to"
-                          f" PRIMAP2 IF.")
+                except Exception as e:
+                    print(f"Error occured when converting table {table} for"
+                          f" {country_name} to PRIMAP2 IF. Exception: {e}")
                     # TODO: error handling and logging
                     # TODO: error handling and logging
 
 
     # process log messages.
     # process log messages.
@@ -116,8 +125,8 @@ def read_year_to_test_specs(
 
 
     if len(last_row_info) > 0:
     if len(last_row_info) > 0:
         log_location = log_path / f"CRF{submission_year}" \
         log_location = log_path / f"CRF{submission_year}" \
-                       / f"{data_yar}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
-        print(f"Data found in the last row. Savin log to "
+                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
+        print(f"Data found in the last row. Saving log to "
               f"{log_location}")
               f"{log_location}")
         save_last_row_info(last_row_info, log_location)
         save_last_row_info(last_row_info, log_location)
 
 
@@ -125,6 +134,8 @@ def read_year_to_test_specs(
     compression = dict(zlib=True, complevel=9)
     compression = dict(zlib=True, complevel=9)
     output_folder = log_path / f"test_read_CRF{submission_year}"
     output_folder = log_path / f"test_read_CRF{submission_year}"
     output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
     output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
+    if totest:
+        output_filename = output_filename + "_totest"
 
 
     if not output_folder.exists():
     if not output_folder.exists():
         output_folder.mkdir()
         output_folder.mkdir()

+ 20 - 20
code/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py

@@ -13,24 +13,24 @@ from datetime import date
 #from pathlib import Path
 #from pathlib import Path
 from typing import Optional, List, Dict, Union
 from typing import Optional, List, Dict, Union
 
 
-#from . import crf_specifications as crf
-import crf_specifications as crf
-
-from UNFCCC_CRF_reader_core import read_crf_table
-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
-from UNFCCC_CRF_reader_core import get_latest_date_for_country
-from UNFCCC_CRF_reader_core import get_crf_files
-from UNFCCC_CRF_reader_core import get_country_name
-from UNFCCC_CRF_reader_devel import save_unknown_categories_info
-from UNFCCC_CRF_reader_devel import save_last_row_info
-
-from util import code_path, log_path, \
+from . import crf_specifications as crf
+#import crf_specifications as crf
+
+from .UNFCCC_CRF_reader_core import read_crf_table
+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
+from .UNFCCC_CRF_reader_core import get_latest_date_for_country
+from .UNFCCC_CRF_reader_core import get_crf_files
+from .UNFCCC_CRF_reader_core import get_country_name
+from .UNFCCC_CRF_reader_devel import save_unknown_categories_info
+from .UNFCCC_CRF_reader_devel import save_last_row_info
+
+from .util import code_path, log_path, \
     custom_country_mapping, extracted_data_path, root_path, \
     custom_country_mapping, extracted_data_path, root_path, \
     all_crf_countries, NoCRFFilesError
     all_crf_countries, NoCRFFilesError
 
 
-import sys
-sys.path.append(code_path.name)
-from UNFCCC_reader.get_submissions_info import get_country_code
+#import sys
+#sys.path.append(code_path.name)
+from ..UNFCCC_reader import get_country_code
 
 
 
 
 # functions:
 # functions:
@@ -45,7 +45,7 @@ from UNFCCC_reader.get_submissions_info import get_country_code
 
 
 
 
 # general approach:
 # general approach:
-# main code in a function that reads on table from one file.
+# main UNFCCC_GHG_data in a function that reads on table from one file.
 # return raw pandas DF for use in different functions
 # return raw pandas DF for use in different functions
 # wrappers around this function to read for a whole country or for test reading where we also
 # wrappers around this function to read for a whole country or for test reading where we also
 # write files with missing sectors etc.
 # write files with missing sectors etc.
@@ -84,7 +84,7 @@ def read_crf_for_country(
     __________
     __________
 
 
     country_codes: str
     country_codes: str
-        ISO 3-letter country code
+        ISO 3-letter country UNFCCC_GHG_data
 
 
     submission_year: int
     submission_year: int
         Year of the submission of the data
         Year of the submission of the data
@@ -220,7 +220,7 @@ def read_crf_for_country_datalad(
     __________
     __________
 
 
     country_codes: str
     country_codes: str
-        ISO 3-letter country code
+        ISO 3-letter country UNFCCC_GHG_data
 
 
     submission_year: int
     submission_year: int
         Year of the submission of the data
         Year of the submission of the data
@@ -382,7 +382,7 @@ def read_new_crf_for_year_datalad(
                 output_files = output_files + country_info["output"]
                 output_files = output_files + country_info["output"]
             else:
             else:
                 data_read = submission_has_been_read(
                 data_read = submission_has_been_read(
-                    country_info["code"], country_info["name"],
+                    country_info["UNFCCC_GHG_data"], country_info["name"],
                     submission_year=submission_year,
                     submission_year=submission_year,
                     submission_date=country_info["date"],
                     submission_date=country_info["date"],
                     verbose=False,
                     verbose=False,
@@ -438,7 +438,7 @@ def get_input_and_output_files_for_country(
         country_code = get_country_code(country)
         country_code = get_country_code(country)
     # now get the country name
     # now get the country name
     country_name = get_country_name(country_code)
     country_name = get_country_name(country_code)
-    country_info["code"] = country_code
+    country_info["UNFCCC_GHG_data"] = country_code
     country_info["name"] = country_name
     country_info["name"] = country_name
 
 
     # determine latest data
     # determine latest data

+ 3 - 1
code/UNFCCC_CRF_reader/__init__.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py

@@ -3,6 +3,8 @@ CRF reader module
 """
 """
 
 
 #from pathlib import Path
 #from pathlib import Path
-#from . import crf_specifications
+from . import crf_specifications
 from .UNFCCC_CRF_reader_prod import read_crf_for_country, read_crf_for_country_datalad
 from .UNFCCC_CRF_reader_prod import read_crf_for_country, read_crf_for_country_datalad
 
 
+__all__ = ["crf_specifications", "read_crf_for_country", "read_crf_for_country_datalad"]
+

+ 0 - 0
code/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py


+ 0 - 0
code/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py


+ 135 - 23
code/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py

@@ -42,7 +42,7 @@ TODO:
 import numpy as np
 import numpy as np
 from .util import unit_info
 from .util import unit_info
 
 
-CRF2022 = {
+CRF2023 = {
     "Table1s1": {
     "Table1s1": {
         "status": "tested",
         "status": "tested",
         "table": {
         "table": {
@@ -715,6 +715,7 @@ CRF2022 = {
             ['Fossil part of biodiesel', ['1.A.3.b.i', 'OLBiodieselFC'], 4],  # LTU
             ['Fossil part of biodiesel', ['1.A.3.b.i', 'OLBiodieselFC'], 4],  # LTU
             ['Other', ['1.A.3.b.i', 'OLOther'], 4],  # UKR, MLT
             ['Other', ['1.A.3.b.i', 'OLOther'], 4],  # UKR, MLT
             ['Other Liquid Fuels', ['1.A.3.b.i', 'OLOther'], 4],  # CYP
             ['Other Liquid Fuels', ['1.A.3.b.i', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.i', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.i', 'OMotorFuels'], 4],  # RUS
             ['Other motor fuels', ['1.A.3.b.i', 'OMotorFuels'], 4],  # RUS
             ['Lubricants in 2-stroke engines', ['1.A.3.b.i', 'Lubricants'], 4],  # HUN
             ['Lubricants in 2-stroke engines', ['1.A.3.b.i', 'Lubricants'], 4],  # HUN
             ['LNG', ['1.A.3.b.i', 'LNG'], 4],  ## USA
             ['LNG', ['1.A.3.b.i', 'LNG'], 4],  ## USA
@@ -746,6 +747,7 @@ CRF2022 = {
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Other', ['1.A.3.b.ii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other', ['1.A.3.b.ii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.ii', 'OLOther'], 4],  # CYP
             ['Other Liquid Fuels', ['1.A.3.b.ii', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.ii', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.ii', 'OMotorFuels'], 4],  # RUS
             ['Other motor fuels', ['1.A.3.b.ii', 'OMotorFuels'], 4],  # RUS
             ['LNG', ['1.A.3.b.ii', 'LNG'], 4],  ## USA
             ['LNG', ['1.A.3.b.ii', 'LNG'], 4],  ## USA
             ['Gaseous fuels', ['1.A.3.b.ii', 'Gaseous'], 3],
             ['Gaseous fuels', ['1.A.3.b.ii', 'Gaseous'], 3],
@@ -774,6 +776,7 @@ CRF2022 = {
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.iii', 'OLBiodieselFC'], 4],  # CAN
             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.iii', 'OLBiodieselFC'], 4],  # CAN
             ['Other', ['1.A.3.b.iii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other', ['1.A.3.b.iii', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.iii', 'OLOther'], 4],  # CYP
             ['Other Liquid Fuels', ['1.A.3.b.iii', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.iii', 'OLOther'], 4],  # SWE new in 2023
             ['Other motor fuels', ['1.A.3.b.iii', 'OMotorFuels'], 4],  # RUS
             ['Other motor fuels', ['1.A.3.b.iii', 'OMotorFuels'], 4],  # RUS
             ['LNG', ['1.A.3.b.iii', 'LNG'], 4],  # USA
             ['LNG', ['1.A.3.b.iii', 'LNG'], 4],  # USA
             ['GTL', ['1.A.3.b.iii', 'GTL'], 4],  # MCO, new in 2022
             ['GTL', ['1.A.3.b.iii', 'GTL'], 4],  # MCO, new in 2022
@@ -802,6 +805,7 @@ CRF2022 = {
             ['Lubricant Oil', ['1.A.3.b.iv', 'Lubricants'], 4],  # PRT
             ['Lubricant Oil', ['1.A.3.b.iv', 'Lubricants'], 4],  # PRT
             ['Other', ['1.A.3.b.iv', 'OLOther'], 4],  # UKR (and probably others)
             ['Other', ['1.A.3.b.iv', 'OLOther'], 4],  # UKR (and probably others)
             ['Other Liquid Fuels', ['1.A.3.b.iv', 'OLOther'], 4],  # CYP
             ['Other Liquid Fuels', ['1.A.3.b.iv', 'OLOther'], 4],  # CYP
+            ['Other non-specified', ['1.A.3.b.iv', 'OLOther'], 4],  # SWE new in 2023
             ['Lube', ['1.A.3.b.iv', 'Lubricants'], 4],  # MCO
             ['Lube', ['1.A.3.b.iv', 'Lubricants'], 4],  # MCO
             ['Lubricants in 2-stroke engines', ['1.A.3.b.iv', 'Lubricants'], 4],  # HUN
             ['Lubricants in 2-stroke engines', ['1.A.3.b.iv', 'Lubricants'], 4],  # HUN
             ['Lubricants (two-stroke engines)', ['1.A.3.b.iv', 'Lubricants'], 4],  # ESP
             ['Lubricants (two-stroke engines)', ['1.A.3.b.iv', 'Lubricants'], 4],  # ESP
@@ -853,7 +857,7 @@ CRF2022 = {
             ['Biomass', ['1.A.3.b.v.6', 'Biomass'], 4],
             ['Biomass', ['1.A.3.b.v.6', 'Biomass'], 4],
             ['Other Fossil Fuels (please specify)', ['1.A.3.b.v.6', 'OtherFF'], 4],
             ['Other Fossil Fuels (please specify)', ['1.A.3.b.v.6', 'OtherFF'], 4],
             # BEL
             # BEL
-            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Total'], 3],
+            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Lubricants'], 3],
             ['Other Liquid Fuels (please specify)', ['1.A.3.b.v.7', 'OtherLiquid'], 4],
             ['Other Liquid Fuels (please specify)', ['1.A.3.b.v.7', 'OtherLiquid'], 4],
             # ROU
             # ROU
             ['Gaseous Fuels', ['1.A.3.b.v.8', 'Total'], 3],
             ['Gaseous Fuels', ['1.A.3.b.v.8', 'Total'], 3],
@@ -925,6 +929,9 @@ CRF2022 = {
             ['Fuel oil C', ['1.A.3.d', 'FuelOilC'], 3],  # JPN
             ['Fuel oil C', ['1.A.3.d', 'FuelOilC'], 3],  # JPN
             ['Diesel Oil', ['1.A.3.d', 'OLDiesel'], 3],  # FIN
             ['Diesel Oil', ['1.A.3.d', 'OLDiesel'], 3],  # FIN
             ['Other Liquid Fuels', ['1.A.3.d', 'OLOther'], 3],  # ROU, new in 2022
             ['Other Liquid Fuels', ['1.A.3.d', 'OLOther'], 3],  # ROU, new in 2022
+            ['Heating and Other Gasoil', ['1.A.3.d', 'OLHeatingOtherGasoil'], 3],
+            # ROU, new in 2023
+            ['Liquified Petroleum Gas', ['1.A.3.d', 'OLLPG'], 3],  # ROU, new in 2023
             ['Gaseous fuels', ['1.A.3.d', 'Gaseous'], 2],
             ['Gaseous fuels', ['1.A.3.d', 'Gaseous'], 2],
             ['Biomass(6)', ['1.A.3.d', 'Biomass'], 2],
             ['Biomass(6)', ['1.A.3.d', 'Biomass'], 2],
             ['Other fossil fuels (please specify)(4)', ['1.A.3.d', 'OtherFF'], 2],
             ['Other fossil fuels (please specify)(4)', ['1.A.3.d', 'OtherFF'], 2],
@@ -1137,6 +1144,7 @@ CRF2022 = {
             ['heavy fuel oil', ['1.A.4.c.ii', 'HeavyFuelOil'], 4],  # NOR
             ['heavy fuel oil', ['1.A.4.c.ii', 'HeavyFuelOil'], 4],  # NOR
             ['Other motor fuels', ['1.A.4.c.ii', 'OMotorFuels'], 4],  # RUS
             ['Other motor fuels', ['1.A.4.c.ii', 'OMotorFuels'], 4],  # RUS
             ['Biodiesel (5 percent fossil portion)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Biodiesel (5 percent fossil portion)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
+            ['Lubricating Oil (Two-Stroke Engines)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
             ['Gaseous fuels', ['1.A.4.c.ii', 'Gaseous'], 3],
             ['Gaseous fuels', ['1.A.4.c.ii', 'Gaseous'], 3],
             ['Biomass(6)', ['1.A.4.c.ii', 'Biomass'], 3],
             ['Biomass(6)', ['1.A.4.c.ii', 'Biomass'], 3],
             ['Other fossil fuels (please specify)(4)', ['1.A.4.c.ii', 'OtherFF'], 3],
             ['Other fossil fuels (please specify)(4)', ['1.A.4.c.ii', 'OtherFF'], 3],
@@ -1448,6 +1456,8 @@ CRF2022 = {
             ['Flaring', ['1.B.1.c.i'], 1],  # UKR, AUS
             ['Flaring', ['1.B.1.c.i'], 1],  # UKR, AUS
             ['Flaring of gas', ['1.B.1.c.i'], 1],  # SWE
             ['Flaring of gas', ['1.B.1.c.i'], 1],  # SWE
             ['Coal Dumps', ['1.B.1.c.ii'], 1],  # JPN
             ['Coal Dumps', ['1.B.1.c.ii'], 1],  # JPN
+            ['Uncontrolled combustion and burning coal dumps', ['1.B.1.c.ii'], 1],
+            # JPN since 2023
             ['SO2 scrubbing', ['1.B.1.c.iii'], 1],  # SVN
             ['SO2 scrubbing', ['1.B.1.c.iii'], 1],  # SVN
             ['Flaring of coke oven gas', ['1.B.1.c.iv'], 1],  # KAZ
             ['Flaring of coke oven gas', ['1.B.1.c.iv'], 1],  # KAZ
             ['Emisson from Coke Oven Gas Subsystem', ['1.B.1.c.iv'], 1],  # POL
             ['Emisson from Coke Oven Gas Subsystem', ['1.B.1.c.iv'], 1],  # POL
@@ -2280,6 +2290,8 @@ CRF2022 = {
             ['Mechanical-Biological Treatment MBT', ['5.E.2']],  # DEU
             ['Mechanical-Biological Treatment MBT', ['5.E.2']],  # DEU
             ['Accidental fires', ['5.E.3']],  # DEU, DKE, DNK, DNM
             ['Accidental fires', ['5.E.3']],  # DEU, DKE, DNK, DNM
             ['Decomposition of Petroleum-Derived Surfactants', ['5.E.4']],  # JPN
             ['Decomposition of Petroleum-Derived Surfactants', ['5.E.4']],  # JPN
+            ['Decomposition of Fossil-fuel Derived Surfactants', ['5.E.4']],
+            # JPN since 2023
             ['Other non-specified', ['5.E.5']],  # USA
             ['Other non-specified', ['5.E.5']],  # USA
             ['Biogas burning without energy recovery', ['5.E.6']],  # PRT
             ['Biogas burning without energy recovery', ['5.E.6']],  # PRT
             ['Sludge spreading', ['5.E.7']],  # ESP
             ['Sludge spreading', ['5.E.7']],  # ESP
@@ -2462,13 +2474,16 @@ CRF2022 = {
             ['Other (please specify)', ['5.C.2.a.ii'], 2],
             ['Other (please specify)', ['5.C.2.a.ii'], 2],
             ['agricultural waste', ['5.C.2.a.ii.1'], 3],  # ITA
             ['agricultural waste', ['5.C.2.a.ii.1'], 3],  # ITA
             ['Agricultural residues', ['5.C.2.a.ii.1'], 3],  # ESP
             ['Agricultural residues', ['5.C.2.a.ii.1'], 3],  # ESP
+            ['Agriculture residues', ['5.C.2.a.ii.1'], 3],  # PRT
             ['Natural residues', ['5.C.2.a.ii.2'], 3],  # CHE
             ['Natural residues', ['5.C.2.a.ii.2'], 3],  # CHE
             ['Wood waste', ['5.C.2.a.ii.3'], 3],  # GBR, GBK
             ['Wood waste', ['5.C.2.a.ii.3'], 3],  # GBR, GBK
             ['Bonfires etc.', ['5.C.2.a.ii.4'], 3],  # DEU
             ['Bonfires etc.', ['5.C.2.a.ii.4'], 3],  # DEU
             ['Bonfires', ['5.C.2.a.ii.4'], 3],  # NLD, ISL
             ['Bonfires', ['5.C.2.a.ii.4'], 3],  # NLD, ISL
             ['Other', ['5.C.2.a.ii.5'], 3],  # EST
             ['Other', ['5.C.2.a.ii.5'], 3],  # EST
             ['Other waste', ['5.C.2.a.ii.5'], 3],  # CZE
             ['Other waste', ['5.C.2.a.ii.5'], 3],  # CZE
+            ['Waste', ['5.C.2.a.ii.5'], 3],  # GBR
             ['Industrial Solid Waste', ['5.C.2.a.ii.6'], 3],  # JPN
             ['Industrial Solid Waste', ['5.C.2.a.ii.6'], 3],  # JPN
+            ['Vine', ['5.C.2.a.ii.7'], 3], # AUT
             ['Non-biogenic', ['5.C.2.b'], 1],
             ['Non-biogenic', ['5.C.2.b'], 1],
             ['Municipal solid waste', ['5.C.2.b.i'], 2],
             ['Municipal solid waste', ['5.C.2.b.i'], 2],
             ['Other (please specify)', ['5.C.2.b.ii'], 2],
             ['Other (please specify)', ['5.C.2.b.ii'], 2],
@@ -2478,6 +2493,7 @@ CRF2022 = {
             ['Bonfires', ['5.C.2.b.ii.4'], 3],  # ISL
             ['Bonfires', ['5.C.2.b.ii.4'], 3],  # ISL
             ['Other', ['5.C.2.b.ii.5'], 3],  # EST
             ['Other', ['5.C.2.b.ii.5'], 3],  # EST
             ['Other waste', ['5.C.2.b.ii.5'], 3],  # CZE
             ['Other waste', ['5.C.2.b.ii.5'], 3],  # CZE
+            ['Waste', ['5.C.2.b.ii.5'], 3],  # GBR
             ['Industrial Solid Waste', ['5.C.2.b.ii.6'], 3],  # JPN
             ['Industrial Solid Waste', ['5.C.2.b.ii.6'], 3],  # JPN
         ],
         ],
         "entity_mapping": {
         "entity_mapping": {
@@ -2528,41 +2544,137 @@ CRF2022 = {
         },
         },
     },  # tested
     },  # tested
     "Summary1.As1": {  # Summary 1, sheet 1
     "Summary1.As1": {  # Summary 1, sheet 1
-        "status": "TODO",
+        "status": "tested",
          "table": {
          "table": {
             "firstrow": 5,
             "firstrow": 5,
-            "lastrow": 26,
+            "lastrow": 28,
             "header": ['entity', 'unit'],
             "header": ['entity', 'unit'],
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
             "categories": ["category"],
             "categories": ["category"],
             "cols_to_ignore": [],
             "cols_to_ignore": [],
             "stop_cats": ["", np.nan],
             "stop_cats": ["", np.nan],
-            "unit_info": unit_info["default"],
+            "unit_info": unit_info["summary"],
         },
         },
         "sector_mapping": [
         "sector_mapping": [
-            ['Total Energy', ['1']],
-            ['A. Fuel combustion activities (sectoral approach)', ['1.A']],
+            ['Total national emissions and removals', ['0']],
+            ['1. Energy', ['1']],
+            ['A. Fuel combustion Reference approach(2)', ['1.A-ref']],
+            ['Sectoral approach(2)', ['1.A']],
             ['1. Energy industries', ['1.A.1']],
             ['1. Energy industries', ['1.A.1']],
-            ['a. Public electricity and heat production', ['1.A.1.a']],
-            ['b. Petroleum refining', ['1.A.1.b']],
-            ['c. Manufacture of solid fuels and other energy industries', ['1.A.1.c']],
             ['2. Manufacturing industries and construction', ['1.A.2']],
             ['2. Manufacturing industries and construction', ['1.A.2']],
-            ['a. Iron and steel', ['1.A.2.a']],
-            ['b. Non-ferrous metals', ['1.A.2.b']],
-            ['c. Chemicals', ['1.A.2.c']],
-            ['d. Pulp, paper and print', ['1.A.2.d']],
-            ['e. Food processing, beverages and tobacco', ['1.A.2.e']],
-            ['f. Non-metallic minerals', ['1.A.2.f']],
-            ['g. Other (please specify)', ['1.A.2.g']],
             ['3. Transport', ['1.A.3']],
             ['3. Transport', ['1.A.3']],
-            ['a. Domestic aviation', ['1.A.3.a']],
-            ['b. Road transportation', ['1.A.3.b']],
-            ['c. Railways', ['1.A.3.c']],
-            ['d. Domestic navigation', ['1.A.3.d']],
-            ['e. Other transportation', ['1.A.3.e']],
+            ['4. Other sectors', ['1.A.4']],
+            ['5. Other', ['1.A.5']],
+            ['B. Fugitive emissions from fuels', ['1.B']],
+            ['1. Solid fuels', ['1.B.1']],
+            ['2. Oil and natural gas and other emissions from energy production',
+             ['1.B.2']],
+            ['C. CO2 Transport and storage', ['1.C']],
+            ['2. Industrial processes and product use', ['2']],
+            ['A. Mineral industry', ['2.A']],
+            ['B. Chemical industry', ['2.B']],
+            ['C. Metal industry', ['2.C']],
+            ['D. Non-energy products from fuels and solvent use', ['2.D']],
+            ['E. Electronic industry', ['2.E']],
+            ['F. Product uses as substitutes for ODS', ['2.F']],
+            ['G. Other product manufacture and use', ['2.G']],
+            ['H. Other(3)', ['2.H']],
         ],
         ],
         "entity_mapping": {
         "entity_mapping": {
-            "NOX": "NOx",
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs(1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
+        },
+        "coords_defaults": {
+            "class": "Total",
+        },
+    },  # tested
+    "Summary1.As2": {  # Summary 1, sheet 2
+        "status": "tested",
+         "table": {
+            "firstrow": 5,
+            "lastrow": 34,
+            "header": ['entity', 'entity', 'unit'],
+            "header_fill": [True, False, True],
+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
+            "categories": ["category"],
+            "cols_to_ignore": [],
+            "stop_cats": ["", np.nan],
+            "unit_info": unit_info["summary"],
+        },
+        "sector_mapping": [
+            ['3. Agriculture', ['3']],
+            ['A. Enteric fermentation', ['3.A']],
+            ['B. Manure management', ['3.B']],
+            ['C. Rice cultivation', ['3.C']],
+            ['D. Agricultural soils', ['3.D']],
+            ['E. Prescribed burning of savannas', ['3.E']],
+            ['F. Field burning of agricultural residues', ['3.F']],
+            ['G. Liming', ['3.G']],
+            ['H. Urea application', ['3.H']],
+            ['I. Other carbon-contining fertilizers', ['3.I']],
+            ['J. Other', ['3.J']],
+            ['4. Land use, land-use change and forestry (4)', ['4']],
+            ['A. Forest land (4)', ['4.A']],
+            ['B. Cropland (4)', ['4.B']],
+            ['C. Grassland (4)', ['4.C']],
+            ['D. Wetlands (4)', ['4.D']],
+            ['E. Settlements (4)', ['4.E']],
+            ['F. Other land (4)', ['4.F']],
+            ['G. Harvested wood products', ['4.G']],
+            ['H. Other (4)', ['4.H']],
+            ['5. Waste', ['5']],
+            ['A. Solid waste disposal (5)', ['5.A']],
+            ['B. Biological treatment of solid waste (5)', ['5.B']],
+            ['C. Incineration and open burning of waste (5)', ['5.C']],
+            ['D. Wastewater treatment and discharge', ['5.D']],
+            ['E. Other (5)', ['5.E']],
+            ['6. Other (please specify)(6)', ['6']],
+        ],
+        "entity_mapping": {
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs (1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
+        },
+        "coords_defaults": {
+            "class": "Total",
+        },
+    },  # tested
+    "Summary1.As3": {  # Summary 1, sheet 3
+        "status": "tested",
+         "table": {
+            "firstrow": 5,
+            "lastrow": 17,
+            "header": ['entity', 'entity', 'unit'],
+            "header_fill": [True, False, True],
+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
+            "categories": ["category"],
+            "cols_to_ignore": [],
+            "stop_cats": ["", np.nan],
+            "unit_info": unit_info["summary"],
+        },
+        "sector_mapping": [
+            ['Memo items:(7)', ['\IGNORE']],
+            ['International bunkers', ['M.Memo.Int']],
+            ['Aviation', ['M.Memo.Int.Avi']],
+            ['Navigation', ['M.Memo.Int.Mar']],
+            ['Multilateral operations', ['M.Memo.Mult']],
+            ['CO2 emissions from biomass', ['M.Memo.Bio']],
+            ['CO2 captured', ['M.Memo.CO2Cap']],
+            ['Long-term storage of C in waste disposal sites', ['M.Memo.LTSW']],
+            ['Indirect N2O', ['M.Memo.IndN2O']],
+            ['Indirect CO2', ['M.Memo.IndCO2']],
+        ],
+        "entity_mapping": {
+            'NOX': 'NOx',
+            'Net CO2 emissions/removals': 'CO2',
+            'HFCs(1)': 'HFCS (AR4GWP100)',
+            'PFCs(1)': 'PFCS (AR4GWP100)',
+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
         },
         },
         "coords_defaults": {
         "coords_defaults": {
             "class": "Total",
             "class": "Total",

+ 2 - 0
code/UNFCCC_CRF_reader/crf_specifications/__init__.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py

@@ -5,3 +5,5 @@ Define the CRF specifications here for easy access
 from .CRF2021_specification import CRF2021
 from .CRF2021_specification import CRF2021
 from .CRF2022_specification import CRF2022
 from .CRF2022_specification import CRF2022
 from .CRF2023_specification import CRF2023
 from .CRF2023_specification import CRF2023
+
+__all__ = ["CRF2021", "CRF2022", "CRF2023"]

+ 10 - 0
code/UNFCCC_CRF_reader/crf_specifications/util.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py

@@ -29,4 +29,14 @@ unit_info = {
         },
         },
         "default_unit": "kt",
         "default_unit": "kt",
     },
     },
+    "summary": {  # contains fgas mixtures in CO2 eq units
+        "unit_row": 0,
+        "entity_row": "header",
+        "regexp_entity": r".*",
+        "regexp_unit": r"\((.*)\)",
+        "manual_repl_unit": {
+            "(kt CO2 equivalent)": "kt CO2eq",
+        },
+        "default_unit": "kt",
+    },
 }
 }

+ 2 - 2
code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py

@@ -3,11 +3,11 @@ This script is a wrapper around the read_crf_for_country
 function such that it can be called from datalad
 function such that it can be called from datalad
 """
 """
 
 
-from UNFCCC_CRF_reader_prod import read_crf_for_country
+from .UNFCCC_CRF_reader_prod import read_crf_for_country
 import argparse
 import argparse
 
 
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--re_read', help='Read data also if already read before',
 parser.add_argument('--re_read', help='Read data also if already read before',

+ 2 - 2
code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py

@@ -4,11 +4,11 @@ from doit in the current setup where doit runs on system python and
 not in the venv.
 not in the venv.
 """
 """
 
 
-from UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
+from .UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
 import argparse
 import argparse
 
 
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission_year', help='Submission round to read')
 parser.add_argument('--submission_year', help='Submission round to read')
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
 parser.add_argument('--re_read', help='Read data also if already read before',
 parser.add_argument('--re_read', help='Read data also if already read before',

+ 1 - 1
code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py

@@ -3,7 +3,7 @@ This script is a wrapper around the read_crf_for_country
 function such that it can be called from datalad
 function such that it can be called from datalad
 """
 """
 
 
-from UNFCCC_CRF_reader_prod import read_new_crf_for_year
+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year
 import argparse
 import argparse
 
 
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()

+ 1 - 1
code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py

@@ -4,7 +4,7 @@ from doit in the current setup where doit runs on system python and
 not in the venv.
 not in the venv.
 """
 """
 
 
-from UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
 from util import NoCRFFilesError
 from util import NoCRFFilesError
 import argparse
 import argparse
 
 

+ 7 - 1
code/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py

@@ -3,21 +3,27 @@ This script is a wrapper around the read_year_to_test_specs
 function such that it can be called from datalad
 function such that it can be called from datalad
 """
 """
 
 
-from UNFCCC_CRF_reader_devel import read_year_to_test_specs
+from UNFCCC_GHG_data.UNFCCC_CRF_reader.UNFCCC_CRF_reader_devel import read_year_to_test_specs
 import argparse
 import argparse
 
 
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--submission_year', help='Submission round to read', type=int)
 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
+parser.add_argument('--totest', help='read tables to test', action='store_true')
 args = parser.parse_args()
 args = parser.parse_args()
 
 
 
 
 submission_year = args.submission_year
 submission_year = args.submission_year
 data_year = args.data_year
 data_year = args.data_year
+if args.totest:
+    totest = True
+else:
+    totest = False
 
 
 read_year_to_test_specs(
 read_year_to_test_specs(
     submission_year=submission_year,
     submission_year=submission_year,
     data_year=data_year,
     data_year=data_year,
+    totest=totest,
 )
 )
 
 
 
 

+ 1 - 1
code/UNFCCC_CRF_reader/util.py → UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py

@@ -4,7 +4,7 @@ from pathlib import Path
 root_path = Path(__file__).parents[2].absolute()
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
 root_path = root_path.resolve()
 log_path = root_path / "log"
 log_path = root_path / "log"
-code_path = root_path / "code"
+code_path = root_path / "UNFCCC_GHG_data"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
 
 

+ 0 - 0
UNFCCC_GHG_data/UNFCCC_downloader/__init__.py


+ 0 - 0
code/UNFCCC_downloader/download_annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py


+ 0 - 0
code/UNFCCC_downloader/download_ndc.py → UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py


+ 0 - 0
code/UNFCCC_downloader/download_non-annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_annexI.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_bur.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py


+ 0 - 0
code/UNFCCC_downloader/fetch_submissions_nc.py → UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py


+ 0 - 0
code/UNFCCC_downloader/unfccc_submission_info.py → UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py


+ 5 - 5
code/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py

@@ -86,7 +86,7 @@ cat_codes_manual = {  # conversion to PRIMAP1 format
     'S/N': 'MMULTIOP',
     'S/N': 'MMULTIOP',
 }
 }
 
 
-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
 
 
 time_format = "%Y"
 time_format = "%Y"
 
 
@@ -219,7 +219,7 @@ for page in pages_to_read:
     if page in range(232, 235):
     if page in range(232, 235):
         df_current.iloc[
         df_current.iloc[
             metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
             metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
     # set index. necessary for the stack operation in the conversion to long format
@@ -253,7 +253,7 @@ for page in pages_to_read:
 
 
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     # then the regex replacements
     # then the regex replacements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
                                                                 regex=True)
                                                                 regex=True)
 
 
@@ -311,7 +311,7 @@ for page in pages_to_read_fgases:
         dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
         dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
     df_current = df_current.drop(idx_header)
     df_current = df_current.drop(idx_header)
 
 
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
     df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
     # set index. necessary for the stack operation in the conversion to long format
@@ -350,7 +350,7 @@ for page in pages_to_read_fgases:
 
 
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     df_current["category"] = df_current["category"].replace(cat_codes_manual)
     # then the regex repalcements
     # then the regex repalcements
-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
                                                                 regex=True)
                                                                 regex=True)
 
 

+ 1 - 1
code/UNFCCC_reader/Chile/config_CHL_BUR4.py → UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py

@@ -64,7 +64,7 @@ filter_remove_IPCC2006 = {
 }
 }
 
 
 
 
-cat_mapping = { # categories not listed here have the same code as in IPCC 2006 specifications
+cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
     '3': 'M.AG',
     '3': 'M.AG',
     '3.A': '3.A.1',
     '3.A': '3.A.1',
     '3.A.1': '3.A.1.a',
     '3.A.1': '3.A.1.a',

+ 2 - 2
code/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py

@@ -52,7 +52,7 @@ unit_info = {
 }
 }
 cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
 cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
                 'Categorías de fuente y sumidero de gases de efecto invernadero.1']
                 'Categorías de fuente y sumidero de gases de efecto invernadero.1']
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
 index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
 
 
 # operations on long format DF
 # operations on long format DF
@@ -169,7 +169,7 @@ for year in years_to_read:
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
     # drop the columns which are empty and repetition of the metadata for the second block
     # drop the columns which are empty and repetition of the metadata for the second block
     df_current.drop(cols_to_drop, axis=1, inplace=True)
     df_current.drop(cols_to_drop, axis=1, inplace=True)
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set multi-index. necessary for the stack operation in the conversion to long format
     # set multi-index. necessary for the stack operation in the conversion to long format

+ 0 - 0
code/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py


+ 3 - 3
code/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py

@@ -38,7 +38,7 @@ year = 2019
 entity_row = 0
 entity_row = 0
 unit_row = 1
 unit_row = 1
 index_cols = "Categories"
 index_cols = "Categories"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 
 
 
@@ -51,7 +51,7 @@ cat_codes_manual = {
     #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
     #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
 }
 }
 
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9]{1,4})\s.*'
 
 
 coords_cols = {
 coords_cols = {
     "category": "category",
     "category": "category",
@@ -202,7 +202,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 df_all = df_all.reset_index(drop=True)
 
 

+ 0 - 0
code/UNFCCC_reader/Mexico/config_MEX_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py


+ 3 - 3
code/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py

@@ -32,7 +32,7 @@ entity_row = 0
 unit_row = 1
 unit_row = 1
 
 
 index_cols = "Categorías de fuentes y sumideros de GEI"
 index_cols = "Categorías de fuentes y sumideros de GEI"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 
 units = {
 units = {
@@ -53,7 +53,7 @@ cat_codes_manual = {
     '2F6 Otras aplicaciones': '2F6',
     '2F6 Otras aplicaciones': '2F6',
 }
 }
 
 
-cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
+cat_code_regexp = r'^\[(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,3})\].*'
 
 
 coords_cols = {
 coords_cols = {
     "category": "category",
     "category": "category",
@@ -168,7 +168,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 df_all = df_all.reset_index(drop=True)
 
 

+ 0 - 0
code/UNFCCC_reader/Montenegro/config_MNE_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py


+ 1 - 1
code/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py

@@ -129,7 +129,7 @@ for i, table in enumerate(tables):
         unit_parts = unit.split(" ")
         unit_parts = unit.split(" ")
         unit = f"{unit_parts[0]} CO2eq"
         unit = f"{unit_parts[0]} CO2eq"
 
 
-    # remove "/n" from category code and name columns
+    # remove "/n" from category UNFCCC_GHG_data and name columns
     df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
     df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
     df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
     df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
 
 

+ 0 - 0
code/UNFCCC_reader/Morocco/config_MAR_BUR3.py → UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py


+ 3 - 3
code/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py

@@ -32,7 +32,7 @@ pages_to_read = range(104, 138)
 
 
 compression = dict(zlib=True, complevel=9)
 compression = dict(zlib=True, complevel=9)
 
 
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 
 index_cols = ['Catégories']
 index_cols = ['Catégories']
@@ -58,7 +58,7 @@ cat_codes_manual = {
     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
 }
 }
 
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
 
 
 coords_terminologies = {
 coords_terminologies = {
     "area": "ISO3",
     "area": "ISO3",
@@ -171,7 +171,7 @@ df_all["category"] = df_all["orig_cat_name"]
 # first the manual replacements
 # first the manual replacements
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 df_all["category"] = df_all["category"].replace(cat_codes_manual)
 # then the regex replacements
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_all = df_all.reset_index(drop=True)
 df_all = df_all.reset_index(drop=True)
 
 

+ 0 - 0
code/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py


+ 2 - 2
code/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py

@@ -37,7 +37,7 @@ years_to_read = range(1990, 2019 + 1)
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 cols_to_read = range(1, 2019 - 1990 + 3)
 cols_to_read = range(1, 2019 - 1990 + 3)
 
 
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['분야·부문/연도']
 index_cols = ['분야·부문/연도']
 
 
 sheet_metadata = {
 sheet_metadata = {
@@ -136,7 +136,7 @@ for sheet in sheets_to_read:
     # read current sheet (one sheet per gas)
     # read current sheet (one sheet per gas)
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
                                engine="openpyxl")
                                engine="openpyxl")
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
     # set index. necessary for the stack operation in the conversion to long format

+ 2 - 2
code/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py → UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py

@@ -32,7 +32,7 @@ years_to_read = range(1990, 2018 + 1)
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
 cols_to_read = range(1, 2018 - 1990 + 3)
 cols_to_read = range(1, 2018 - 1990 + 3)
 
 
-# columns for category code and original category name
+# columns for category UNFCCC_GHG_data and original category name
 index_cols = ['분야·부문/연도']
 index_cols = ['분야·부문/연도']
 
 
 sheet_metadata = {
 sheet_metadata = {
@@ -131,7 +131,7 @@ for sheet in sheets_to_read:
     # read current sheet (one sheet per gas)
     # read current sheet (one sheet per gas)
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
                                engine="openpyxl")
                                engine="openpyxl")
-    # drop all rows where the index cols (category code and name) are both NaN
+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
     # as without one of them there is no category information
     # as without one of them there is no category information
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
     # set index. necessary for the stack operation in the conversion to long format
     # set index. necessary for the stack operation in the conversion to long format

+ 0 - 0
code/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py → UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py


+ 2 - 2
code/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py

@@ -32,7 +32,7 @@ if not output_folder.exists():
 output_filename = 'TWN_inventory_2022_'
 output_filename = 'TWN_inventory_2022_'
 inventory_file = '00_abstract_en.pdf'
 inventory_file = '00_abstract_en.pdf'
 
 
-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
 
 
 time_format = "%Y"
 time_format = "%Y"
 
 
@@ -227,7 +227,7 @@ for table_name in table_defs.keys():
     df_this_table["category"] = df_this_table["category"].replace(
     df_this_table["category"] = df_this_table["category"].replace(
         table_def["cat_codes_manual"])
         table_def["cat_codes_manual"])
     # then the regex replacements
     # then the regex replacements
-    repl = lambda m: m.group('code')
+    repl = lambda m: m.group('UNFCCC_GHG_data')
     df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
     df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
                                                                       repl, regex=True)
                                                                       repl, regex=True)
 
 

+ 3 - 3
code/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py → UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py

@@ -44,7 +44,7 @@ unit_row = 1
 gwp_to_use = "AR4GWP100"
 gwp_to_use = "AR4GWP100"
 
 
 index_cols = "Greenhouse gas source and sink categories"
 index_cols = "Greenhouse gas source and sink categories"
-# special header as category code and name in one column
+# special header as category UNFCCC_GHG_data and name in one column
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
 
 
 # manual category codes
 # manual category codes
@@ -54,7 +54,7 @@ cat_codes_manual = {
     'CO2 from Biomass': 'MBIO',
     'CO2 from Biomass': 'MBIO',
 }
 }
 
 
-cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
+cat_code_regexp = r'^(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,4})[\s\.].*'
 
 
 coords_cols = {
 coords_cols = {
     "category": "category",
     "category": "category",
@@ -184,7 +184,7 @@ df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
 # first the manual replacements
 # first the manual replacements
 df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
 df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
 # then the regex replacements
 # then the regex replacements
-repl = lambda m: m.group('code')
+repl = lambda m: m.group('UNFCCC_GHG_data')
 df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
 df_inventory_long = df_inventory_long.reset_index(drop=True)
 df_inventory_long = df_inventory_long.reset_index(drop=True)
 
 

+ 6 - 0
UNFCCC_GHG_data/UNFCCC_reader/__init__.py

@@ -0,0 +1,6 @@
+# expose some of the functions to the outside as they are used in other readers as well
+# TODO: create a unified util module for all readers
+
+from .get_submissions_info import get_country_code
+
+__all__ = ["get_country_code"]

+ 1 - 1
code/UNFCCC_reader/country_info.py → UNFCCC_GHG_data/UNFCCC_reader/country_info.py

@@ -8,7 +8,7 @@ from get_submissions_info import get_country_datasets
 # Find the right function and possible input and output files and
 # Find the right function and possible input and output files and
 # read the data using datalad run.
 # read the data using datalad run.
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 args = parser.parse_args()
 args = parser.parse_args()
 country = args.country
 country = args.country
 
 

+ 0 - 0
code/UNFCCC_reader/folder_mapping.json → UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json


+ 0 - 0
code/UNFCCC_reader/folder_mapping.py → UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py


+ 35 - 35
code/UNFCCC_reader/get_submissions_info.py → UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py

@@ -9,7 +9,7 @@ import pycountry
 
 
 root_path = Path(__file__).parents[2].absolute()
 root_path = Path(__file__).parents[2].absolute()
 root_path = root_path.resolve()
 root_path = root_path.resolve()
-code_path = root_path / "code" / "UNFCCC_reader"
+code_path = root_path / "UNFCCC_GHG_data" / "UNFCCC_reader"
 # beware, folders below are different than for CRF reader
 # beware, folders below are different than for CRF reader
 downloaded_data_path = root_path / "downloaded_data"
 downloaded_data_path = root_path / "downloaded_data"
 extracted_data_path = root_path / "extracted_data"
 extracted_data_path = root_path / "extracted_data"
@@ -46,14 +46,14 @@ def get_country_submissions(
         print_sub: bool = True,
         print_sub: bool = True,
 ) -> Dict[str, List[str]]:
 ) -> Dict[str, List[str]]:
     """
     """
-    Input is a three letter ISO code for a country, or the countries name.
-    The function tries to map the country name to an ISO code and then
+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the countries name.
+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
     queries the folder mapping files for folders.
     queries the folder mapping files for folders.
 
 
     Parameters
     Parameters
     ----------
     ----------
         country_name: str
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
 
         print_sub: bool
         print_sub: bool
             If True information on submissions will be written to stdout
             If True information on submissions will be written to stdout
@@ -70,7 +70,7 @@ def get_country_submissions(
     country_code = get_country_code(country_name)
     country_code = get_country_code(country_name)
 
 
     if print_sub:
     if print_sub:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
 
     country_submissions = {}
     country_submissions = {}
     if print_sub:
     if print_sub:
@@ -115,14 +115,14 @@ def get_country_datasets(
         print_ds: bool = True,
         print_ds: bool = True,
 ) -> Dict[str, List[str]]:
 ) -> Dict[str, List[str]]:
     """
     """
-    Input is a three letter ISO code for a country, or the country's name.
-    The function tries to map the country name to an ISO code and then
-    checks the code and data folders for content on the country.
+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the country's name.
+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
+    checks the UNFCCC_GHG_data and data folders for content on the country.
 
 
     Parameters
     Parameters
     ----------
     ----------
         country_name: str
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
 
         print_ds: bool
         print_ds: bool
             If True information on submissions will be written to stdout
             If True information on submissions will be written to stdout
@@ -138,11 +138,11 @@ def get_country_datasets(
     data_folder_legacy = legacy_data_path
     data_folder_legacy = legacy_data_path
 
 
 
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
     country_code = get_country_code(country_name)
 
 
     if print_ds:
     if print_ds:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
 
     rep_data = {}
     rep_data = {}
     # data
     # data
@@ -181,7 +181,7 @@ def get_country_datasets(
                     # process filename to get submission
                     # process filename to get submission
                     parts = dataset.split('_')
                     parts = dataset.split('_')
                     if parts[0] != country_code:
                     if parts[0] != country_code:
-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
                     else:
                     else:
                         terminology = "_".join(parts[3 : ])
                         terminology = "_".join(parts[3 : ])
                         key = f"{parts[1]} ({parts[2]}, {terminology})"
                         key = f"{parts[1]} ({parts[2]}, {terminology})"
@@ -197,9 +197,9 @@ def get_country_datasets(
 
 
                         code_file = get_code_file(country_code, parts[1])
                         code_file = get_code_file(country_code, parts[1])
                         if code_file:
                         if code_file:
-                            data_info = data_info + f"code: {code_file.name}"
+                            data_info = data_info + f"UNFCCC_GHG_data: {code_file.name}"
                         else:
                         else:
-                            data_info = data_info + f"code: not found"
+                            data_info = data_info + f"UNFCCC_GHG_data: not found"
 
 
                         cleaned_datasets_current_folder[key] = data_info
                         cleaned_datasets_current_folder[key] = data_info
 
 
@@ -250,7 +250,7 @@ def get_country_datasets(
                     # process filename to get submission
                     # process filename to get submission
                     parts = dataset.split('_')
                     parts = dataset.split('_')
                     if parts[0] != country_code:
                     if parts[0] != country_code:
-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
                     else:
                     else:
                         terminology = "_".join(parts[3 : ])
                         terminology = "_".join(parts[3 : ])
                         key = f"{parts[1]} ({parts[2]}, {terminology}, legacy)"
                         key = f"{parts[1]} ({parts[2]}, {terminology}, legacy)"
@@ -288,13 +288,13 @@ def get_country_code(
         country_name: str,
         country_name: str,
 )->str:
 )->str:
     """
     """
-    obtain country code. If the input is a code it will be returned, if the input
-    is not a three letter code a search will be performed
+    obtain country UNFCCC_GHG_data. If the input is a UNFCCC_GHG_data it will be returned, if the input
+    is not a three letter UNFCCC_GHG_data a search will be performed
 
 
     Parameters
     Parameters
     __________
     __________
     country_name: str
     country_name: str
-        Country code or name to get the three-letter code for.
+        Country UNFCCC_GHG_data or name to get the three-letter UNFCCC_GHG_data for.
 
 
     """
     """
     # First check if it's in the list of custom codes
     # First check if it's in the list of custom codes
@@ -302,7 +302,7 @@ def get_country_code(
         country_code = country_name
         country_code = country_name
     else:
     else:
         try:
         try:
-            # check if it's a 3 letter code
+            # check if it's a 3 letter UNFCCC_GHG_data
             country = pycountry.countries.get(alpha_3=country_name)
             country = pycountry.countries.get(alpha_3=country_name)
             country_code = country.alpha_3
             country_code = country.alpha_3
         except:
         except:
@@ -310,7 +310,7 @@ def get_country_code(
                 country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
                 country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
             except:
             except:
                 raise ValueError(f"Country name {country_name} can not be mapped to "
                 raise ValueError(f"Country name {country_name} can not be mapped to "
-                                 f"any country code. Try using the ISO3 code directly.")
+                                 f"any country UNFCCC_GHG_data. Try using the ISO3 UNFCCC_GHG_data directly.")
             if len(country) > 1:
             if len(country) > 1:
                 country_code = None
                 country_code = None
                 for current_country in country:
                 for current_country in country:
@@ -337,13 +337,13 @@ def get_possible_inputs(
     Parameters
     Parameters
     ----------
     ----------
         country_name: str
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
 
         submission: str
         submission: str
             String of the submission
             String of the submission
 
 
         print_info: bool = False
         print_info: bool = False
-            If True print information on code found
+            If True print information on UNFCCC_GHG_data found
 
 
     Returns
     Returns
     -------
     -------
@@ -352,11 +352,11 @@ def get_possible_inputs(
 
 
     data_folder = downloaded_data_path
     data_folder = downloaded_data_path
 
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
     country_code = get_country_code(country_name)
 
 
     if print_info:
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
 
     input_files = []
     input_files = []
     for item in data_folder.iterdir():
     for item in data_folder.iterdir():
@@ -399,7 +399,7 @@ def get_possible_outputs(
     Parameters
     Parameters
     ----------
     ----------
         country_name: str
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
 
         submission: str
         submission: str
             String of the submission
             String of the submission
@@ -414,10 +414,10 @@ def get_possible_outputs(
 
 
     data_folder = extracted_data_path
     data_folder = extracted_data_path
 
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
     country_code = get_country_code(country_name)
     if print_info:
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
 
     output_files = []
     output_files = []
     for item in data_folder.iterdir():
     for item in data_folder.iterdir():
@@ -457,17 +457,17 @@ def get_code_file(
     Parameters
     Parameters
     ----------
     ----------
         country_name: str
         country_name: str
-            String containing the country name or ISO 3 letter code
+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
 
 
         submission: str
         submission: str
             String of the submission
             String of the submission
 
 
         print_info: bool = False
         print_info: bool = False
-            If True print information on code found
+            If True print information on UNFCCC_GHG_data found
 
 
     Returns
     Returns
     -------
     -------
-        returns a pathlib Path object for the code file
+        returns a pathlib Path object for the UNFCCC_GHG_data file
     """
     """
 
 
     code_file_path = None
     code_file_path = None
@@ -477,18 +477,18 @@ def get_code_file(
     if submission[0:3] == "CRF":
     if submission[0:3] == "CRF":
         return root_path / "UNFCCC_CRF_reader"
         return root_path / "UNFCCC_CRF_reader"
 
 
-    # obtain country code
+    # obtain country UNFCCC_GHG_data
     country_code = get_country_code(country_name)
     country_code = get_country_code(country_name)
 
 
     if print_info:
     if print_info:
-        print(f"Country name {country_name} maps to ISO code {country_code}")
+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
 
 
     with open(code_path / "folder_mapping.json", "r") as mapping_file:
     with open(code_path / "folder_mapping.json", "r") as mapping_file:
         folder_mapping = json.load(mapping_file)
         folder_mapping = json.load(mapping_file)
 
 
     if country_code not in folder_mapping:
     if country_code not in folder_mapping:
         if print_info:
         if print_info:
-            print("No code available")
+            print("No UNFCCC_GHG_data available")
             print("")
             print("")
     else:
     else:
         country_folder = code_path / folder_mapping[country_code]
         country_folder = code_path / folder_mapping[country_code]
@@ -497,13 +497,13 @@ def get_code_file(
         for file in country_folder.iterdir():
         for file in country_folder.iterdir():
             if file.match(code_file_name_candidate):
             if file.match(code_file_name_candidate):
                 if code_file_path is not None:
                 if code_file_path is not None:
-                    raise ValueError(f"Found multiple code candidates: "
+                    raise ValueError(f"Found multiple UNFCCC_GHG_data candidates: "
                                      f"{code_file_path} and file.name. "
                                      f"{code_file_path} and file.name. "
                                      f"Please use only one file with name "
                                      f"Please use only one file with name "
                                      f"'read_ISO3_submission_XXX.YYY'.")
                                      f"'read_ISO3_submission_XXX.YYY'.")
                 else:
                 else:
                     if print_info:
                     if print_info:
-                        print(f"Found code file {file.relative_to(root_path)}")
+                        print(f"Found UNFCCC_GHG_data file {file.relative_to(root_path)}")
                 code_file_path = file
                 code_file_path = file
 
 
     if code_file_path is not None:
     if code_file_path is not None:

+ 4 - 4
code/UNFCCC_reader/read_UNFCCC_submission.py → UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py

@@ -14,7 +14,7 @@ from get_submissions_info import get_possible_outputs
 # Find the right function and possible input and output files and
 # Find the right function and possible input and output files and
 # read the data using datalad run.
 # read the data using datalad run.
 parser = argparse.ArgumentParser()
 parser = argparse.ArgumentParser()
-parser.add_argument('--country', help='Country name or code')
+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
 parser.add_argument('--submission', help='Submission to read')
 parser.add_argument('--submission', help='Submission to read')
 
 
 args = parser.parse_args()
 args = parser.parse_args()
@@ -34,7 +34,7 @@ print("")
 script_name = get_code_file(country, submission)
 script_name = get_code_file(country, submission)
 
 
 if script_name is not None:
 if script_name is not None:
-    print(f"Found code file {script_name}")
+    print(f"Found UNFCCC_GHG_data file {script_name}")
     print("")
     print("")
 
 
     # get possible input files
     # get possible input files
@@ -77,8 +77,8 @@ if script_name is not None:
         explicit=True,
         explicit=True,
     )
     )
 else:
 else:
-    # no code found.
-    print(f"No code found to read {submission} from {country}")
+    # no UNFCCC_GHG_data found.
+    print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
     print(f"Use 'doit country_info --country={country} to get "
     print(f"Use 'doit country_info --country={country} to get "
           f"a list of available submissions and datasets.")
           f"a list of available submissions and datasets.")
 
 

+ 8 - 0
UNFCCC_GHG_data/__init__.py

@@ -0,0 +1,8 @@
+####
+
+from . import UNFCCC_reader
+from . import UNFCCC_CRF_reader
+# import UNFCCC_DI_reader
+# import UNFCCC_downloader
+
+__all__ = ["UNFCCC_reader", "UNFCCC_CRF_reader"]

+ 0 - 12
code/requirements.txt

@@ -1,12 +0,0 @@
-bs4
-requests
-pandas
-selenium
-primap2
-countrynames
-pycountry
-datalad
-treelib
-camelot-py
-opencv-python
-ghostscript

+ 25 - 23
dodo.py

@@ -7,10 +7,12 @@ from doit import get_var
 def task_setup_venv():
 def task_setup_venv():
     """Create virtual environment"""
     """Create virtual environment"""
     return {
     return {
-        'file_dep': ['code/requirements.txt'],
+        'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
         'actions': ['python3 -m venv venv',
         'actions': ['python3 -m venv venv',
-                    './venv/bin/pip install --upgrade pip',
-                    './venv/bin/pip install -Ur code/requirements.txt',
+                    './venv/bin/pip install --upgrade pip wheel',
+                    #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
+                    './venv/bin/pip install --upgrade --upgrade-strategy '
+                    'eager -e .[dev]',
                     'touch venv',],
                     'touch venv',],
         'targets': ['venv'],
         'targets': ['venv'],
         'verbosity': 2,
         'verbosity': 2,
@@ -27,7 +29,7 @@ def task_map_folders():
     Create or update the folder mapping in the given folder
     Create or update the folder mapping in the given folder
     """
     """
     return {
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder={read_config_folder['folder']}"],
                     f"--folder={read_config_folder['folder']}"],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],
@@ -41,7 +43,7 @@ def task_update_bur():
         'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
         'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
         'actions': ['datalad run -m "Fetch BUR submissions" '
         'actions': ['datalad run -m "Fetch BUR submissions" '
                     '-o downloaded_data/UNFCCC/submissions-bur.csv '
                     '-o downloaded_data/UNFCCC/submissions-bur.csv '
-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_bur.py'],
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],
     }
     }
@@ -55,8 +57,8 @@ def task_download_bur():
         # before download
         # before download
         'actions': ['datalad run -m "Download BUR submissions" '
         'actions': ['datalad run -m "Download BUR submissions" '
                     '-i downloaded_data/UNFCCC/submissions-bur.csv '
                     '-i downloaded_data/UNFCCC/submissions-bur.csv '
-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=BUR',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     f"--folder=downloaded_data/UNFCCC"
                     ],
                     ],
         'verbosity': 2,
         'verbosity': 2,
@@ -70,7 +72,7 @@ def task_update_nc():
         'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
         'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
         'actions': ['datalad run -m "Fetch NC submissions" '
         'actions': ['datalad run -m "Fetch NC submissions" '
                     '-o downloaded_data/UNFCCC/submissions-nc.csv '
                     '-o downloaded_data/UNFCCC/submissions-nc.csv '
-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_nc.py'],
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],
     }
     }
@@ -84,8 +86,8 @@ def task_download_nc():
         # before download
         # before download
         'actions': ['datalad run -m "Download NC submissions" '
         'actions': ['datalad run -m "Download NC submissions" '
                     '-i downloaded_data/UNFCCC/submissions-nc.csv '
                     '-i downloaded_data/UNFCCC/submissions-nc.csv '
-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=NC',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     f"--folder=downloaded_data/UNFCCC"
                     ],
                     ],
         'verbosity': 2,
         'verbosity': 2,
@@ -108,7 +110,7 @@ def task_update_annexi():
         'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
         'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
                     "--explicit "
                     "--explicit "
                     f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
                     f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
-                    f"./venv/bin/python code/UNFCCC_downloader/fetch_submissions_annexI.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
                     f"--year={update_aI_config['year']}"],
                     f"--year={update_aI_config['year']}"],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],
@@ -124,9 +126,9 @@ def task_download_annexi():
         'actions': [f"datalad run -m 'Download AnnexI submissions for "
         'actions': [f"datalad run -m 'Download AnnexI submissions for "
                     f"{update_aI_config['category']}{update_aI_config['year']}' "
                     f"{update_aI_config['category']}{update_aI_config['year']}' "
                     f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
                     f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
-                    f"./venv/bin/python code/UNFCCC_downloader/download_annexI.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
                     f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
                     f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     f"--folder=downloaded_data/UNFCCC"
                     ],
                     ],
         'verbosity': 2,
         'verbosity': 2,
@@ -138,8 +140,8 @@ def task_download_ndc():
     """ Download NDC submissions """
     """ Download NDC submissions """
     return {
     return {
         'actions': ['datalad run -m "Download NDC submissions" '
         'actions': ['datalad run -m "Download NDC submissions" '
-                    './venv/bin/python code/UNFCCC_downloader/download_ndc.py',
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=downloaded_data/UNFCCC"
                     f"--folder=downloaded_data/UNFCCC"
                     ],
                     ],
         'verbosity': 2,
         'verbosity': 2,
@@ -157,11 +159,11 @@ read_config = {
 
 
 # TODO: make individual task for non-UNFCCC submissions
 # TODO: make individual task for non-UNFCCC submissions
 def task_read_unfccc_submission():
 def task_read_unfccc_submission():
-    """ Read submission for a country (if code exists) (not for CRF)"""
+    """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
     return {
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/read_UNFCCC_submission.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
                     f"--country={read_config['country']} --submission={read_config['submission']}",
                     f"--country={read_config['country']} --submission={read_config['submission']}",
-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                     f"--folder=extracted_data/UNFCCC"
                     f"--folder=extracted_data/UNFCCC"
                     ],
                     ],
         'verbosity': 2,
         'verbosity': 2,
@@ -182,11 +184,11 @@ read_config_crf = {
 def task_read_unfccc_crf_submission():
 def task_read_unfccc_crf_submission():
     """ Read CRF submission for a country """
     """ Read CRF submission for a country """
     actions = [
     actions = [
-        f"./venv/bin/python code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
         f"--country={read_config_crf['country']} "
         f"--country={read_config_crf['country']} "
         f"--submission_year={read_config_crf['submission_year']} "
         f"--submission_year={read_config_crf['submission_year']} "
         f"--submission_date={read_config_crf['submission_date']} ",
         f"--submission_date={read_config_crf['submission_date']} ",
-        f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
         f"--folder=extracted_data/UNFCCC"
         f"--folder=extracted_data/UNFCCC"
         ]
         ]
     if read_config_crf["re_read"] == "True":
     if read_config_crf["re_read"] == "True":
@@ -201,9 +203,9 @@ def task_read_unfccc_crf_submission():
 def task_read_new_unfccc_crf_for_year():
 def task_read_new_unfccc_crf_for_year():
     """ Read CRF submission for all countries for given submission year. by default only reads
     """ Read CRF submission for all countries for given submission year. by default only reads
     data not present yet. Only reads the latest updated submission for each country."""
     data not present yet. Only reads the latest updated submission for each country."""
-    actions = [f"./venv/bin/python code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
+    actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
                f"--submission_year={read_config_crf['submission_year']} ",
                f"--submission_year={read_config_crf['submission_year']} ",
-               f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
+               f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
                f"--folder=extracted_data/UNFCCC"
                f"--folder=extracted_data/UNFCCC"
                ]
                ]
     # specifying countries is currently disabled duo to problems with command line
     # specifying countries is currently disabled duo to problems with command line
@@ -224,7 +226,7 @@ def task_country_info():
     """ Print information on submissions and datasets
     """ Print information on submissions and datasets
     available for given country"""
     available for given country"""
     return {
     return {
-        'actions': [f"./venv/bin/python code/UNFCCC_reader/country_info.py "
+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/country_info.py "
                     f"--country={read_config['country']}"],
                     f"--country={read_config['country']}"],
         'verbosity': 2,
         'verbosity': 2,
         'setup': ['setup_venv'],
         'setup': ['setup_venv'],

+ 8 - 0
pyproject.toml

@@ -0,0 +1,8 @@
+[build-system]
+requires = [
+    "setuptools>=42",
+    "wheel",
+    "setuptools_scm[toml]>=3.4"
+]
+build-backend = "setuptools.build_meta"
+

+ 1 - 0
requirements.txt

@@ -0,0 +1 @@
+.

+ 1 - 0
requirements_dev.txt

@@ -0,0 +1 @@
+.[dev]

+ 72 - 0
setup.cfg

@@ -0,0 +1,72 @@
+[metadata]
+name = UNFCCC_GHG_data
+version = 0.2
+author = Johannes Gütschow
+author_email = mail@johannes-guetschow.de
+description = Tools to read GHG data submitted to the UNFCCC using various methods
+long_description = file: README.md
+long_description_content_type = text/md
+url = https://github.com/JGuetschow/UNFCCC_non-AnnexI_data
+#project_urls =
+classifiers =
+    Development Status :: 3 - Alpha
+    Intended Audience :: Science/Research
+    Topic :: Scientific/Engineering :: Atmospheric Science
+    License :: OSI Approved :: Apache Software License
+    Natural Language :: English
+    Programming Language :: Python :: 3
+    Programming Language :: Python :: 3.8
+    Programming Language :: Python :: 3.9
+    Programming Language :: Python :: 3.10
+license = Apache Software License 2.0
+license_file = LICENSE
+
+[options]
+packages =
+    UNFCCC_GHG_data
+    UNFCCC_GHG_data.UNFCCC_CRF_reader
+    UNFCCC_GHG_data.UNFCCC_reader
+    UNFCCC_GHG_data.UNFCCC_downloader
+    #UNFCCC_GHG_data.UNFCCC_DI_reader
+    #UNFCCC_GHG_data.datasets
+python_requires = >=3.8
+setup_requires =
+    setuptools_scm
+install_requires =
+    bs4
+    requests
+    pandas
+    selenium
+    primap2
+    countrynames
+    pycountry
+    datalad
+    treelib
+    camelot-py
+    opencv-python
+    ghostscript
+
+[options.extras_require]
+dev =
+    pip
+    wheel
+    bs4
+    requests
+    pandas
+    selenium
+    primap2
+    countrynames
+    pycountry
+    datalad
+    treelib
+    camelot-py
+    opencv-python
+    ghostscript
+    ipykernel
+    jupyter
+
+
+[options.package_data]
+* =
+    *.csv
+    *.nc

+ 5 - 0
setup.py

@@ -0,0 +1,5 @@
+#!/usr/bin/env python
+
+import setuptools
+
+setuptools.setup()