il y a 2 ans · 361eaac03f
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,7 @@ __pycache__
 
															 /JG_test_code/
														
 
															 .doit.db
														
 
															 log
														
 
															-code/datasets
														
 
															-code/UNFCCC_DI_reader
														
 
															+UNFCCC_GHG_data/datasets
														
 
															+UNFCCC_GHG_data/UNFCCC_DI_reader
														
 
															 datasets/UNFCCC/DI_NAI
														
--- a/LICENSE
+++ b/LICENSE
@@ -0,0 +1,202 @@
 
															+
														
 
															+                                 Apache License
														
 
															+                           Version 2.0, January 2004
														
 
															+                        http://www.apache.org/licenses/
														
 
															+
														
 
															+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
														
 
															+
														
 
															+   1. Definitions.
														
 
															+
														
 
															+      "License" shall mean the terms and conditions for use, reproduction,
														
 
															+      and distribution as defined by Sections 1 through 9 of this document.
														
 
															+
														
 
															+      "Licensor" shall mean the copyright owner or entity authorized by
														
 
															+      the copyright owner that is granting the License.
														
 
															+
														
 
															+      "Legal Entity" shall mean the union of the acting entity and all
														
 
															+      other entities that control, are controlled by, or are under common
														
 
															+      control with that entity. For the purposes of this definition,
														
 
															+      "control" means (i) the power, direct or indirect, to cause the
														
 
															+      direction or management of such entity, whether by contract or
														
 
															+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
														
 
															+      outstanding shares, or (iii) beneficial ownership of such entity.
														
 
															+
														
 
															+      "You" (or "Your") shall mean an individual or Legal Entity
														
 
															+      exercising permissions granted by this License.
														
 
															+
														
 
															+      "Source" form shall mean the preferred form for making modifications,
														
 
															+      including but not limited to software source code, documentation
														
 
															+      source, and configuration files.
														
 
															+
														
 
															+      "Object" form shall mean any form resulting from mechanical
														
 
															+      transformation or translation of a Source form, including but
														
 
															+      not limited to compiled object code, generated documentation,
														
 
															+      and conversions to other media types.
														
 
															+
														
 
															+      "Work" shall mean the work of authorship, whether in Source or
														
 
															+      Object form, made available under the License, as indicated by a
														
 
															+      copyright notice that is included in or attached to the work
														
 
															+      (an example is provided in the Appendix below).
														
 
															+
														
 
															+      "Derivative Works" shall mean any work, whether in Source or Object
														
 
															+      form, that is based on (or derived from) the Work and for which the
														
 
															+      editorial revisions, annotations, elaborations, or other modifications
														
 
															+      represent, as a whole, an original work of authorship. For the purposes
														
 
															+      of this License, Derivative Works shall not include works that remain
														
 
															+      separable from, or merely link (or bind by name) to the interfaces of,
														
 
															+      the Work and Derivative Works thereof.
														
 
															+
														
 
															+      "Contribution" shall mean any work of authorship, including
														
 
															+      the original version of the Work and any modifications or additions
														
 
															+      to that Work or Derivative Works thereof, that is intentionally
														
 
															+      submitted to Licensor for inclusion in the Work by the copyright owner
														
 
															+      or by an individual or Legal Entity authorized to submit on behalf of
														
 
															+      the copyright owner. For the purposes of this definition, "submitted"
														
 
															+      means any form of electronic, verbal, or written communication sent
														
 
															+      to the Licensor or its representatives, including but not limited to
														
 
															+      communication on electronic mailing lists, source code control systems,
														
 
															+      and issue tracking systems that are managed by, or on behalf of, the
														
 
															+      Licensor for the purpose of discussing and improving the Work, but
														
 
															+      excluding communication that is conspicuously marked or otherwise
														
 
															+      designated in writing by the copyright owner as "Not a Contribution."
														
 
															+
														
 
															+      "Contributor" shall mean Licensor and any individual or Legal Entity
														
 
															+      on behalf of whom a Contribution has been received by Licensor and
														
 
															+      subsequently incorporated within the Work.
														
 
															+
														
 
															+   2. Grant of Copyright License. Subject to the terms and conditions of
														
 
															+      this License, each Contributor hereby grants to You a perpetual,
														
 
															+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
														
 
															+      copyright license to reproduce, prepare Derivative Works of,
														
 
															+      publicly display, publicly perform, sublicense, and distribute the
														
 
															+      Work and such Derivative Works in Source or Object form.
														
 
															+
														
 
															+   3. Grant of Patent License. Subject to the terms and conditions of
														
 
															+      this License, each Contributor hereby grants to You a perpetual,
														
 
															+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
														
 
															+      (except as stated in this section) patent license to make, have made,
														
 
															+      use, offer to sell, sell, import, and otherwise transfer the Work,
														
 
															+      where such license applies only to those patent claims licensable
														
 
															+      by such Contributor that are necessarily infringed by their
														
 
															+      Contribution(s) alone or by combination of their Contribution(s)
														
 
															+      with the Work to which such Contribution(s) was submitted. If You
														
 
															+      institute patent litigation against any entity (including a
														
 
															+      cross-claim or counterclaim in a lawsuit) alleging that the Work
														
 
															+      or a Contribution incorporated within the Work constitutes direct
														
 
															+      or contributory patent infringement, then any patent licenses
														
 
															+      granted to You under this License for that Work shall terminate
														
 
															+      as of the date such litigation is filed.
														
 
															+
														
 
															+   4. Redistribution. You may reproduce and distribute copies of the
														
 
															+      Work or Derivative Works thereof in any medium, with or without
														
 
															+      modifications, and in Source or Object form, provided that You
														
 
															+      meet the following conditions:
														
 
															+
														
 
															+      (a) You must give any other recipients of the Work or
														
 
															+          Derivative Works a copy of this License; and
														
 
															+
														
 
															+      (b) You must cause any modified files to carry prominent notices
														
 
															+          stating that You changed the files; and
														
 
															+
														
 
															+      (c) You must retain, in the Source form of any Derivative Works
														
 
															+          that You distribute, all copyright, patent, trademark, and
														
 
															+          attribution notices from the Source form of the Work,
														
 
															+          excluding those notices that do not pertain to any part of
														
 
															+          the Derivative Works; and
														
 
															+
														
 
															+      (d) If the Work includes a "NOTICE" text file as part of its
														
 
															+          distribution, then any Derivative Works that You distribute must
														
 
															+          include a readable copy of the attribution notices contained
														
 
															+          within such NOTICE file, excluding those notices that do not
														
 
															+          pertain to any part of the Derivative Works, in at least one
														
 
															+          of the following places: within a NOTICE text file distributed
														
 
															+          as part of the Derivative Works; within the Source form or
														
 
															+          documentation, if provided along with the Derivative Works; or,
														
 
															+          within a display generated by the Derivative Works, if and
														
 
															+          wherever such third-party notices normally appear. The contents
														
 
															+          of the NOTICE file are for informational purposes only and
														
 
															+          do not modify the License. You may add Your own attribution
														
 
															+          notices within Derivative Works that You distribute, alongside
														
 
															+          or as an addendum to the NOTICE text from the Work, provided
														
 
															+          that such additional attribution notices cannot be construed
														
 
															+          as modifying the License.
														
 
															+
														
 
															+      You may add Your own copyright statement to Your modifications and
														
 
															+      may provide additional or different license terms and conditions
														
 
															+      for use, reproduction, or distribution of Your modifications, or
														
 
															+      for any such Derivative Works as a whole, provided Your use,
														
 
															+      reproduction, and distribution of the Work otherwise complies with
														
 
															+      the conditions stated in this License.
														
 
															+
														
 
															+   5. Submission of Contributions. Unless You explicitly state otherwise,
														
 
															+      any Contribution intentionally submitted for inclusion in the Work
														
 
															+      by You to the Licensor shall be under the terms and conditions of
														
 
															+      this License, without any additional terms or conditions.
														
 
															+      Notwithstanding the above, nothing herein shall supersede or modify
														
 
															+      the terms of any separate license agreement you may have executed
														
 
															+      with Licensor regarding such Contributions.
														
 
															+
														
 
															+   6. Trademarks. This License does not grant permission to use the trade
														
 
															+      names, trademarks, service marks, or product names of the Licensor,
														
 
															+      except as required for reasonable and customary use in describing the
														
 
															+      origin of the Work and reproducing the content of the NOTICE file.
														
 
															+
														
 
															+   7. Disclaimer of Warranty. Unless required by applicable law or
														
 
															+      agreed to in writing, Licensor provides the Work (and each
														
 
															+      Contributor provides its Contributions) on an "AS IS" BASIS,
														
 
															+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
														
 
															+      implied, including, without limitation, any warranties or conditions
														
 
															+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
														
 
															+      PARTICULAR PURPOSE. You are solely responsible for determining the
														
 
															+      appropriateness of using or redistributing the Work and assume any
														
 
															+      risks associated with Your exercise of permissions under this License.
														
 
															+
														
 
															+   8. Limitation of Liability. In no event and under no legal theory,
														
 
															+      whether in tort (including negligence), contract, or otherwise,
														
 
															+      unless required by applicable law (such as deliberate and grossly
														
 
															+      negligent acts) or agreed to in writing, shall any Contributor be
														
 
															+      liable to You for damages, including any direct, indirect, special,
														
 
															+      incidental, or consequential damages of any character arising as a
														
 
															+      result of this License or out of the use or inability to use the
														
 
															+      Work (including but not limited to damages for loss of goodwill,
														
 
															+      work stoppage, computer failure or malfunction, or any and all
														
 
															+      other commercial damages or losses), even if such Contributor
														
 
															+      has been advised of the possibility of such damages.
														
 
															+
														
 
															+   9. Accepting Warranty or Additional Liability. While redistributing
														
 
															+      the Work or Derivative Works thereof, You may choose to offer,
														
 
															+      and charge a fee for, acceptance of support, warranty, indemnity,
														
 
															+      or other liability obligations and/or rights consistent with this
														
 
															+      License. However, in accepting such obligations, You may act only
														
 
															+      on Your own behalf and on Your sole responsibility, not on behalf
														
 
															+      of any other Contributor, and only if You agree to indemnify,
														
 
															+      defend, and hold each Contributor harmless for any liability
														
 
															+      incurred by, or claims asserted against, such Contributor by reason
														
 
															+      of your accepting any such warranty or additional liability.
														
 
															+
														
 
															+   END OF TERMS AND CONDITIONS
														
 
															+
														
 
															+   APPENDIX: How to apply the Apache License to your work.
														
 
															+
														
 
															+      To apply the Apache License to your work, attach the following
														
 
															+      boilerplate notice, with the fields enclosed by brackets "[]"
														
 
															+      replaced with your own identifying information. (Don't include
														
 
															+      the brackets!)  The text should be enclosed in the appropriate
														
 
															+      comment syntax for the file format. We also recommend that a
														
 
															+      file or class name and description of purpose be included on the
														
 
															+      same "printed page" as the copyright notice for easier
														
 
															+      identification within third-party archives.
														
 
															+
														
 
															+   Copyright [yyyy] [name of copyright owner]
														
 
															+
														
 
															+   Licensed under the Apache License, Version 2.0 (the "License");
														
 
															+   you may not use this file except in compliance with the License.
														
 
															+   You may obtain a copy of the License at
														
 
															+
														
 
															+       http://www.apache.org/licenses/LICENSE-2.0
														
 
															+
														
 
															+   Unless required by applicable law or agreed to in writing, software
														
 
															+   distributed under the License is distributed on an "AS IS" BASIS,
														
 
															+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
														
 
															+   See the License for the specific language governing permissions and
														
 
															+   limitations under the License.
														
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ help:
 
															 	echo Options:
														
 
															 	echo make venv: create virtual environment
														
 
															-venv: code/requirements.txt
														
 
															+venv: UNFCCC_GHG_data
														
 
															 	[ -d ./venv ] || python3 -m venv venv
														
 
															 	./venv/bin/pip install --upgrade pip
														
 
															 	./venv/bin/pip install -Ur code/requirements.txt
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
@@ -17,16 +17,16 @@ from datetime import date
 
															 root_path = Path(__file__).parents[2].absolute()
														
 
															 root_path = root_path.resolve()
														
 
															 #log_path = root_path / "log"
														
 
															-code_path = root_path / "code"
														
 
															+code_path = root_path / "UNFCCC_GHG_data"
														
 
															 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
														
 
															 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
														
 
															 dataset_path = root_path / "datasets" / "UNFCCC"
														
 
															 #sys.path.append(code_path.name)
														
 
															-from util import all_crf_countries
														
 
															-from UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
														
 
															-from UNFCCC_CRF_reader_prod import submission_has_been_read
														
 
															+from .util import all_crf_countries
														
 
															+from .UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
														
 
															+from .UNFCCC_CRF_reader_prod import submission_has_been_read
														
 
															 parser = argparse.ArgumentParser()
														
 
															 parser.add_argument('--submission_year', help='Submission round to read', type=int)
														
@@ -46,7 +46,7 @@ for country in all_crf_countries:
 
															         # check if the latest submission has been read already
														
 
															         data_read = submission_has_been_read(
														
 
															-            country_info["code"], country_info["name"],
														
 
															+            country_info["UNFCCC_GHG_data"], country_info["name"],
														
 
															             submission_year=submission_year,
														
 
															             submission_date=country_info["date"],
														
 
															             verbose=False,
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
@@ -17,8 +17,8 @@ from operator import itemgetter
 
															 from collections import Counter
														
 
															 from typing import Dict, List, Optional, Tuple, Union
														
 
															 from datetime import datetime, timedelta
														
 
															-import crf_specifications as crf
														
 
															-from util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
														
 
															+from . import crf_specifications as crf
														
 
															+from .util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
														
 
															 ### reading functions
														
@@ -144,7 +144,8 @@ def convert_crf_table_to_pm2if(
 
															         #coords_value_filling=coords_value_filling,
														
 
															         filter_remove=filter_remove,
														
 
															         filter_keep=filter_keep,
														
 
															-        meta_data=meta_data
														
 
															+        meta_data=meta_data,
														
 
															+        time_format="%Y",
														
 
															     )
														
 
															     return df_table_if
														
@@ -170,7 +171,7 @@ def read_crf_table(
 
															     __________
														
 
															     country_codes: str or list[str]
														
 
															-        ISO 3-letter country code or list of country codes
														
 
															+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
														
 
															     table: str
														
 
															         name of the table sheet in the CRF xlsx file
														
@@ -265,8 +266,8 @@ def read_crf_table(
 
															                 df_all = pd.concat([df_this_file, df_all])
														
 
															                 unknown_rows = unknown_rows + unknown_rows_this_file
														
 
															                 last_row_info = last_row_info + last_row_info_this_file
														
 
															-        except:
														
 
															-            print(f"Year could not be converted to int for file {file}. Skipping file.")
														
 
															+        except Exception as e:
														
 
															+            print(f"Error when reading file {file}. Skipping file. Exception: {e}")
														
 
															     return df_all, unknown_rows, last_row_info
														
@@ -359,8 +360,15 @@ def read_crf_table_from_file(
 
															     df_header = df_header.replace(r"Unnamed: [0-9]{1,2}", np.nan, regex=True)
														
 
															     header = []
														
 
															     # fill nans with the last value from the left
														
 
															-    for row in range(0, len(df_header)):
														
 
															-        header.append(list(df_header.iloc[row].fillna(method="ffill")))
														
 
															+    if "header_fill" in table_properties:
														
 
															+        for row in range(0, len(df_header)):
														
 
															+            if table_properties["header_fill"][row]:
														
 
															+                header.append(list(df_header.iloc[row].fillna(method="ffill")))
														
 
															+            else:
														
 
															+                header.append(list(df_header.iloc[row]))
														
 
															+    else:
														
 
															+        for row in range(0, len(df_header)):
														
 
															+            header.append(list(df_header.iloc[row].fillna(method="ffill")))
														
 
															     # combine all non-unit rows into one
														
 
															     entities = None
														
@@ -391,7 +399,6 @@ def read_crf_table_from_file(
 
															     df_current.iloc[0] = units
														
 
															     df_current.columns = entities
														
 
															-    #### standardized header is finalized
														
 
															     # remove all columns to ignore
														
 
															     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
														
@@ -533,7 +540,7 @@ def get_crf_files(
 
															     __________
														
 
															     country_codes: str or list[str]
														
 
															-        ISO 3-letter country code or list of country codes
														
 
															+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
														
 
															     submission_year: int
														
 
															         Year of the submission of the data
														
@@ -644,7 +651,7 @@ def get_info_from_crf_filename(
 
															     Returns
														
 
															     _______
														
 
															     dict with fields:
														
 
															-        party: the party that submitted the data (3 letter code)
														
 
															+        party: the party that submitted the data (3 letter UNFCCC_GHG_data)
														
 
															         submission_year: year of submission
														
 
															         data_year: year in which the meissions took place
														
 
															         date: date of the submission
														
@@ -680,8 +687,8 @@ def filter_filenames(
 
															         List with pathlib.Path objects for the filenames to filter
														
 
															     party: Optional[Union[str, List[str]]] (default: None)
														
 
															-        List of country codes or single country code. If given only files
														
 
															-        for this(these) country-code(s) will be returned.
														
 
															+        List of country codes or single country UNFCCC_GHG_data. If given only files
														
 
															+        for this(these) country-UNFCCC_GHG_data(s) will be returned.
														
 
															     data_year: Optional[Union[int, List[int]]] (default: None)
														
 
															         List of data years or single year. If given only files for this
														
@@ -878,7 +885,7 @@ def filter_category(
 
															         mapping: List
														
 
															             mapping for a single category
														
 
															         country: str
														
 
															-            iso 3-letter code of the country
														
 
															+            iso 3-letter UNFCCC_GHG_data of the country
														
 
															     Returns
														
 
															     _______
														
@@ -918,7 +925,7 @@ def get_latest_date_for_country(
 
															     Parameters
														
 
															     __________
														
 
															     country: str
														
 
															-        3-letter country code
														
 
															+        3-letter country UNFCCC_GHG_data
														
 
															     submission_year: int
														
 
															         Year of the submission to find the l;atest date for
														
@@ -1054,7 +1061,7 @@ def find_latest_date(
 
															 def get_country_name(
														
 
															         country_code: str,
														
 
															 ) -> str:
														
 
															-    """get country name from code """
														
 
															+    """get country name from UNFCCC_GHG_data """
														
 
															     if country_code in custom_country_mapping:
														
 
															         country_name = custom_country_mapping[country_code]
														
 
															     else:
														
@@ -1062,7 +1069,7 @@ def get_country_name(
 
															             country = pycountry.countries.get(alpha_3=country_code)
														
 
															             country_name = country.name
														
 
															         except:
														
 
															-            raise ValueError(f"Country code {country_code} can not be mapped to "
														
 
															+            raise ValueError(f"Country UNFCCC_GHG_data {country_code} can not be mapped to "
														
 
															                              f"any country")
														
 
															     return country_name
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
@@ -14,21 +14,24 @@ from pathlib import Path
 
															 from datetime import date
														
 
															-from util import all_crf_countries
														
 
															-from util import log_path
														
 
															-import crf_specifications as crf
														
 
															-from UNFCCC_CRF_reader_core import get_country_name
														
 
															-from UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
														
 
															-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
														
 
															+from .util import all_crf_countries
														
 
															+from .util import log_path
														
 
															+from . import crf_specifications as crf
														
 
															+from .UNFCCC_CRF_reader_core import get_country_name
														
 
															+from .UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
														
 
															+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
														
 
															 def read_year_to_test_specs(
														
 
															         submission_year: int,
														
 
															         data_year: Optional[int]=None,
														
 
															+        totest: Optional[bool]=False,
														
 
															 ) -> xr.Dataset:
														
 
															     """
														
 
															     Read one xlsx file (so one data year) for each country for a submission year to
														
 
															     create log files and extend the specifications
														
 
															+    totest: if true only read tables with "totest" status
														
 
															+
														
 
															     """
														
 
															     if data_year is None:
														
 
															         data_year=2000
														
@@ -37,6 +40,8 @@ def read_year_to_test_specs(
 
															     last_row_info = []
														
 
															     ds_all = None
														
 
															     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
														
 
															+    if totest:
														
 
															+        print("Reading only tables to test.")
														
 
															     print("#"*80)
														
 
															     try:
														
 
															         crf_spec = getattr(crf, f"CRF{submission_year}")
														
@@ -44,8 +49,12 @@ def read_year_to_test_specs(
 
															         raise ValueError(f"No terminology exists for submission years {submission_year}, "
														
 
															                          f"{submission_year - 1}")
														
 
															-    tables = [table for table in crf_spec.keys()
														
 
															-              if crf_spec[table]["status"] == "tested"]
														
 
															+    if totest:
														
 
															+        tables = [table for table in crf_spec.keys()
														
 
															+                  if crf_spec[table]["status"] == "totest"]
														
 
															+    else:
														
 
															+        tables = [table for table in crf_spec.keys()
														
 
															+                  if crf_spec[table]["status"] == "tested"]
														
 
															     print(f"The following tables are available in the " \
														
 
															           f"CRF{submission_year} specification: {tables}")
														
 
															     print("#" * 80)
														
@@ -101,9 +110,9 @@ def read_year_to_test_specs(
 
															                         ds_all = ds_table_pm2
														
 
															                     else:
														
 
															                         ds_all = ds_all.combine_first(ds_table_pm2)
														
 
															-                except:
														
 
															-                    print(f"Error occured when converting table {table} for {country_name} to"
														
 
															-                          f" PRIMAP2 IF.")
														
 
															+                except Exception as e:
														
 
															+                    print(f"Error occured when converting table {table} for"
														
 
															+                          f" {country_name} to PRIMAP2 IF. Exception: {e}")
														
 
															                     # TODO: error handling and logging
														
 
															     # process log messages.
														
@@ -116,8 +125,8 @@ def read_year_to_test_specs(
 
															     if len(last_row_info) > 0:
														
 
															         log_location = log_path / f"CRF{submission_year}" \
														
 
															-                       / f"{data_yar}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
														
 
															-        print(f"Data found in the last row. Savin log to "
														
 
															+                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
														
 
															+        print(f"Data found in the last row. Saving log to "
														
 
															               f"{log_location}")
														
 
															         save_last_row_info(last_row_info, log_location)
														
@@ -125,6 +134,8 @@ def read_year_to_test_specs(
 
															     compression = dict(zlib=True, complevel=9)
														
 
															     output_folder = log_path / f"test_read_CRF{submission_year}"
														
 
															     output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
														
 
															+    if totest:
														
 
															+        output_filename = output_filename + "_totest"
														
 
															     if not output_folder.exists():
														
 
															         output_folder.mkdir()
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
@@ -13,24 +13,24 @@ from datetime import date
 
															 #from pathlib import Path
														
 
															 from typing import Optional, List, Dict, Union
														
 
															-#from . import crf_specifications as crf
														
 
															-import crf_specifications as crf
														
 
															-
														
 
															-from UNFCCC_CRF_reader_core import read_crf_table
														
 
															-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
														
 
															-from UNFCCC_CRF_reader_core import get_latest_date_for_country
														
 
															-from UNFCCC_CRF_reader_core import get_crf_files
														
 
															-from UNFCCC_CRF_reader_core import get_country_name
														
 
															-from UNFCCC_CRF_reader_devel import save_unknown_categories_info
														
 
															-from UNFCCC_CRF_reader_devel import save_last_row_info
														
 
															-
														
 
															-from util import code_path, log_path, \
														
 
															+from . import crf_specifications as crf
														
 
															+#import crf_specifications as crf
														
 
															+
														
 
															+from .UNFCCC_CRF_reader_core import read_crf_table
														
 
															+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
														
 
															+from .UNFCCC_CRF_reader_core import get_latest_date_for_country
														
 
															+from .UNFCCC_CRF_reader_core import get_crf_files
														
 
															+from .UNFCCC_CRF_reader_core import get_country_name
														
 
															+from .UNFCCC_CRF_reader_devel import save_unknown_categories_info
														
 
															+from .UNFCCC_CRF_reader_devel import save_last_row_info
														
 
															+
														
 
															+from .util import code_path, log_path, \
														
 
															     custom_country_mapping, extracted_data_path, root_path, \
														
 
															     all_crf_countries, NoCRFFilesError
														
 
															-import sys
														
 
															-sys.path.append(code_path.name)
														
 
															-from UNFCCC_reader.get_submissions_info import get_country_code
														
 
															+#import sys
														
 
															+#sys.path.append(code_path.name)
														
 
															+from ..UNFCCC_reader import get_country_code
														
 
															 # functions:
														
@@ -45,7 +45,7 @@ from UNFCCC_reader.get_submissions_info import get_country_code
 
															 # general approach:
														
 
															-# main code in a function that reads on table from one file.
														
 
															+# main UNFCCC_GHG_data in a function that reads on table from one file.
														
 
															 # return raw pandas DF for use in different functions
														
 
															 # wrappers around this function to read for a whole country or for test reading where we also
														
 
															 # write files with missing sectors etc.
														
@@ -84,7 +84,7 @@ def read_crf_for_country(
 
															     __________
														
 
															     country_codes: str
														
 
															-        ISO 3-letter country code
														
 
															+        ISO 3-letter country UNFCCC_GHG_data
														
 
															     submission_year: int
														
 
															         Year of the submission of the data
														
@@ -220,7 +220,7 @@ def read_crf_for_country_datalad(
 
															     __________
														
 
															     country_codes: str
														
 
															-        ISO 3-letter country code
														
 
															+        ISO 3-letter country UNFCCC_GHG_data
														
 
															     submission_year: int
														
 
															         Year of the submission of the data
														
@@ -382,7 +382,7 @@ def read_new_crf_for_year_datalad(
 
															                 output_files = output_files + country_info["output"]
														
 
															             else:
														
 
															                 data_read = submission_has_been_read(
														
 
															-                    country_info["code"], country_info["name"],
														
 
															+                    country_info["UNFCCC_GHG_data"], country_info["name"],
														
 
															                     submission_year=submission_year,
														
 
															                     submission_date=country_info["date"],
														
 
															                     verbose=False,
														
@@ -438,7 +438,7 @@ def get_input_and_output_files_for_country(
 
															         country_code = get_country_code(country)
														
 
															     # now get the country name
														
 
															     country_name = get_country_name(country_code)
														
 
															-    country_info["code"] = country_code
														
 
															+    country_info["UNFCCC_GHG_data"] = country_code
														
 
															     country_info["name"] = country_name
														
 
															     # determine latest data
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
@@ -3,6 +3,8 @@ CRF reader module
 
															 """
														
 
															 #from pathlib import Path
														
 
															-#from . import crf_specifications
														
 
															+from . import crf_specifications
														
 
															 from .UNFCCC_CRF_reader_prod import read_crf_for_country, read_crf_for_country_datalad
														
 
															+__all__ = ["crf_specifications", "read_crf_for_country", "read_crf_for_country_datalad"]
														
 
															+
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
@@ -42,7 +42,7 @@ TODO:
 
															 import numpy as np
														
 
															 from .util import unit_info
														
 
															-CRF2022 = {
														
 
															+CRF2023 = {
														
 
															     "Table1s1": {
														
 
															         "status": "tested",
														
 
															         "table": {
														
@@ -715,6 +715,7 @@ CRF2022 = {
 
															             ['Fossil part of biodiesel', ['1.A.3.b.i', 'OLBiodieselFC'], 4],  # LTU
														
 
															             ['Other', ['1.A.3.b.i', 'OLOther'], 4],  # UKR, MLT
														
 
															             ['Other Liquid Fuels', ['1.A.3.b.i', 'OLOther'], 4],  # CYP
														
 
															+            ['Other non-specified', ['1.A.3.b.i', 'OLOther'], 4],  # SWE new in 2023
														
 
															             ['Other motor fuels', ['1.A.3.b.i', 'OMotorFuels'], 4],  # RUS
														
 
															             ['Lubricants in 2-stroke engines', ['1.A.3.b.i', 'Lubricants'], 4],  # HUN
														
 
															             ['LNG', ['1.A.3.b.i', 'LNG'], 4],  ## USA
														
@@ -746,6 +747,7 @@ CRF2022 = {
 
															             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.ii', 'OLBiodieselFC'], 4],  # CAN
														
 
															             ['Other', ['1.A.3.b.ii', 'OLOther'], 4],  # UKR (and probably others)
														
 
															             ['Other Liquid Fuels', ['1.A.3.b.ii', 'OLOther'], 4],  # CYP
														
 
															+            ['Other non-specified', ['1.A.3.b.ii', 'OLOther'], 4],  # SWE new in 2023
														
 
															             ['Other motor fuels', ['1.A.3.b.ii', 'OMotorFuels'], 4],  # RUS
														
 
															             ['LNG', ['1.A.3.b.ii', 'LNG'], 4],  ## USA
														
 
															             ['Gaseous fuels', ['1.A.3.b.ii', 'Gaseous'], 3],
														
@@ -774,6 +776,7 @@ CRF2022 = {
 
															             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.iii', 'OLBiodieselFC'], 4],  # CAN
														
 
															             ['Other', ['1.A.3.b.iii', 'OLOther'], 4],  # UKR (and probably others)
														
 
															             ['Other Liquid Fuels', ['1.A.3.b.iii', 'OLOther'], 4],  # CYP
														
 
															+            ['Other non-specified', ['1.A.3.b.iii', 'OLOther'], 4],  # SWE new in 2023
														
 
															             ['Other motor fuels', ['1.A.3.b.iii', 'OMotorFuels'], 4],  # RUS
														
 
															             ['LNG', ['1.A.3.b.iii', 'LNG'], 4],  # USA
														
 
															             ['GTL', ['1.A.3.b.iii', 'GTL'], 4],  # MCO, new in 2022
														
@@ -802,6 +805,7 @@ CRF2022 = {
 
															             ['Lubricant Oil', ['1.A.3.b.iv', 'Lubricants'], 4],  # PRT
														
 
															             ['Other', ['1.A.3.b.iv', 'OLOther'], 4],  # UKR (and probably others)
														
 
															             ['Other Liquid Fuels', ['1.A.3.b.iv', 'OLOther'], 4],  # CYP
														
 
															+            ['Other non-specified', ['1.A.3.b.iv', 'OLOther'], 4],  # SWE new in 2023
														
 
															             ['Lube', ['1.A.3.b.iv', 'Lubricants'], 4],  # MCO
														
 
															             ['Lubricants in 2-stroke engines', ['1.A.3.b.iv', 'Lubricants'], 4],  # HUN
														
 
															             ['Lubricants (two-stroke engines)', ['1.A.3.b.iv', 'Lubricants'], 4],  # ESP
														
@@ -853,7 +857,7 @@ CRF2022 = {
 
															             ['Biomass', ['1.A.3.b.v.6', 'Biomass'], 4],
														
 
															             ['Other Fossil Fuels (please specify)', ['1.A.3.b.v.6', 'OtherFF'], 4],
														
 
															             # BEL
														
 
															-            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Total'], 3],
														
 
															+            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Lubricants'], 3],
														
 
															             ['Other Liquid Fuels (please specify)', ['1.A.3.b.v.7', 'OtherLiquid'], 4],
														
 
															             # ROU
														
 
															             ['Gaseous Fuels', ['1.A.3.b.v.8', 'Total'], 3],
														
@@ -925,6 +929,9 @@ CRF2022 = {
 
															             ['Fuel oil C', ['1.A.3.d', 'FuelOilC'], 3],  # JPN
														
 
															             ['Diesel Oil', ['1.A.3.d', 'OLDiesel'], 3],  # FIN
														
 
															             ['Other Liquid Fuels', ['1.A.3.d', 'OLOther'], 3],  # ROU, new in 2022
														
 
															+            ['Heating and Other Gasoil', ['1.A.3.d', 'OLHeatingOtherGasoil'], 3],
														
 
															+            # ROU, new in 2023
														
 
															+            ['Liquified Petroleum Gas', ['1.A.3.d', 'OLLPG'], 3],  # ROU, new in 2023
														
 
															             ['Gaseous fuels', ['1.A.3.d', 'Gaseous'], 2],
														
 
															             ['Biomass(6)', ['1.A.3.d', 'Biomass'], 2],
														
 
															             ['Other fossil fuels (please specify)(4)', ['1.A.3.d', 'OtherFF'], 2],
														
@@ -1137,6 +1144,7 @@ CRF2022 = {
 
															             ['heavy fuel oil', ['1.A.4.c.ii', 'HeavyFuelOil'], 4],  # NOR
														
 
															             ['Other motor fuels', ['1.A.4.c.ii', 'OMotorFuels'], 4],  # RUS
														
 
															             ['Biodiesel (5 percent fossil portion)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
														
 
															+            ['Lubricating Oil (Two-Stroke Engines)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
														
 
															             ['Gaseous fuels', ['1.A.4.c.ii', 'Gaseous'], 3],
														
 
															             ['Biomass(6)', ['1.A.4.c.ii', 'Biomass'], 3],
														
 
															             ['Other fossil fuels (please specify)(4)', ['1.A.4.c.ii', 'OtherFF'], 3],
														
@@ -1448,6 +1456,8 @@ CRF2022 = {
 
															             ['Flaring', ['1.B.1.c.i'], 1],  # UKR, AUS
														
 
															             ['Flaring of gas', ['1.B.1.c.i'], 1],  # SWE
														
 
															             ['Coal Dumps', ['1.B.1.c.ii'], 1],  # JPN
														
 
															+            ['Uncontrolled combustion and burning coal dumps', ['1.B.1.c.ii'], 1],
														
 
															+            # JPN since 2023
														
 
															             ['SO2 scrubbing', ['1.B.1.c.iii'], 1],  # SVN
														
 
															             ['Flaring of coke oven gas', ['1.B.1.c.iv'], 1],  # KAZ
														
 
															             ['Emisson from Coke Oven Gas Subsystem', ['1.B.1.c.iv'], 1],  # POL
														
@@ -2280,6 +2290,8 @@ CRF2022 = {
 
															             ['Mechanical-Biological Treatment MBT', ['5.E.2']],  # DEU
														
 
															             ['Accidental fires', ['5.E.3']],  # DEU, DKE, DNK, DNM
														
 
															             ['Decomposition of Petroleum-Derived Surfactants', ['5.E.4']],  # JPN
														
 
															+            ['Decomposition of Fossil-fuel Derived Surfactants', ['5.E.4']],
														
 
															+            # JPN since 2023
														
 
															             ['Other non-specified', ['5.E.5']],  # USA
														
 
															             ['Biogas burning without energy recovery', ['5.E.6']],  # PRT
														
 
															             ['Sludge spreading', ['5.E.7']],  # ESP
														
@@ -2462,13 +2474,16 @@ CRF2022 = {
 
															             ['Other (please specify)', ['5.C.2.a.ii'], 2],
														
 
															             ['agricultural waste', ['5.C.2.a.ii.1'], 3],  # ITA
														
 
															             ['Agricultural residues', ['5.C.2.a.ii.1'], 3],  # ESP
														
 
															+            ['Agriculture residues', ['5.C.2.a.ii.1'], 3],  # PRT
														
 
															             ['Natural residues', ['5.C.2.a.ii.2'], 3],  # CHE
														
 
															             ['Wood waste', ['5.C.2.a.ii.3'], 3],  # GBR, GBK
														
 
															             ['Bonfires etc.', ['5.C.2.a.ii.4'], 3],  # DEU
														
 
															             ['Bonfires', ['5.C.2.a.ii.4'], 3],  # NLD, ISL
														
 
															             ['Other', ['5.C.2.a.ii.5'], 3],  # EST
														
 
															             ['Other waste', ['5.C.2.a.ii.5'], 3],  # CZE
														
 
															+            ['Waste', ['5.C.2.a.ii.5'], 3],  # GBR
														
 
															             ['Industrial Solid Waste', ['5.C.2.a.ii.6'], 3],  # JPN
														
 
															+            ['Vine', ['5.C.2.a.ii.7'], 3], # AUT
														
 
															             ['Non-biogenic', ['5.C.2.b'], 1],
														
 
															             ['Municipal solid waste', ['5.C.2.b.i'], 2],
														
 
															             ['Other (please specify)', ['5.C.2.b.ii'], 2],
														
@@ -2478,6 +2493,7 @@ CRF2022 = {
 
															             ['Bonfires', ['5.C.2.b.ii.4'], 3],  # ISL
														
 
															             ['Other', ['5.C.2.b.ii.5'], 3],  # EST
														
 
															             ['Other waste', ['5.C.2.b.ii.5'], 3],  # CZE
														
 
															+            ['Waste', ['5.C.2.b.ii.5'], 3],  # GBR
														
 
															             ['Industrial Solid Waste', ['5.C.2.b.ii.6'], 3],  # JPN
														
 
															         ],
														
 
															         "entity_mapping": {
														
@@ -2528,41 +2544,137 @@ CRF2022 = {
 
															         },
														
 
															     },  # tested
														
 
															     "Summary1.As1": {  # Summary 1, sheet 1
														
 
															-        "status": "TODO",
														
 
															+        "status": "tested",
														
 
															          "table": {
														
 
															             "firstrow": 5,
														
 
															-            "lastrow": 26,
														
 
															+            "lastrow": 28,
														
 
															             "header": ['entity', 'unit'],
														
 
															             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
														
 
															             "categories": ["category"],
														
 
															             "cols_to_ignore": [],
														
 
															             "stop_cats": ["", np.nan],
														
 
															-            "unit_info": unit_info["default"],
														
 
															+            "unit_info": unit_info["summary"],
														
 
															         },
														
 
															         "sector_mapping": [
														
 
															-            ['Total Energy', ['1']],
														
 
															-            ['A. Fuel combustion activities (sectoral approach)', ['1.A']],
														
 
															+            ['Total national emissions and removals', ['0']],
														
 
															+            ['1. Energy', ['1']],
														
 
															+            ['A. Fuel combustion Reference approach(2)', ['1.A-ref']],
														
 
															+            ['Sectoral approach(2)', ['1.A']],
														
 
															             ['1. Energy industries', ['1.A.1']],
														
 
															-            ['a. Public electricity and heat production', ['1.A.1.a']],
														
 
															-            ['b. Petroleum refining', ['1.A.1.b']],
														
 
															-            ['c. Manufacture of solid fuels and other energy industries', ['1.A.1.c']],
														
 
															             ['2. Manufacturing industries and construction', ['1.A.2']],
														
 
															-            ['a. Iron and steel', ['1.A.2.a']],
														
 
															-            ['b. Non-ferrous metals', ['1.A.2.b']],
														
 
															-            ['c. Chemicals', ['1.A.2.c']],
														
 
															-            ['d. Pulp, paper and print', ['1.A.2.d']],
														
 
															-            ['e. Food processing, beverages and tobacco', ['1.A.2.e']],
														
 
															-            ['f. Non-metallic minerals', ['1.A.2.f']],
														
 
															-            ['g. Other (please specify)', ['1.A.2.g']],
														
 
															             ['3. Transport', ['1.A.3']],
														
 
															-            ['a. Domestic aviation', ['1.A.3.a']],
														
 
															-            ['b. Road transportation', ['1.A.3.b']],
														
 
															-            ['c. Railways', ['1.A.3.c']],
														
 
															-            ['d. Domestic navigation', ['1.A.3.d']],
														
 
															-            ['e. Other transportation', ['1.A.3.e']],
														
 
															+            ['4. Other sectors', ['1.A.4']],
														
 
															+            ['5. Other', ['1.A.5']],
														
 
															+            ['B. Fugitive emissions from fuels', ['1.B']],
														
 
															+            ['1. Solid fuels', ['1.B.1']],
														
 
															+            ['2. Oil and natural gas and other emissions from energy production',
														
 
															+             ['1.B.2']],
														
 
															+            ['C. CO2 Transport and storage', ['1.C']],
														
 
															+            ['2. Industrial processes and product use', ['2']],
														
 
															+            ['A. Mineral industry', ['2.A']],
														
 
															+            ['B. Chemical industry', ['2.B']],
														
 
															+            ['C. Metal industry', ['2.C']],
														
 
															+            ['D. Non-energy products from fuels and solvent use', ['2.D']],
														
 
															+            ['E. Electronic industry', ['2.E']],
														
 
															+            ['F. Product uses as substitutes for ODS', ['2.F']],
														
 
															+            ['G. Other product manufacture and use', ['2.G']],
														
 
															+            ['H. Other(3)', ['2.H']],
														
 
															         ],
														
 
															         "entity_mapping": {
														
 
															-            "NOX": "NOx",
														
 
															+            'NOX': 'NOx',
														
 
															+            'Net CO2 emissions/removals': 'CO2',
														
 
															+            'HFCs(1)': 'HFCS (AR4GWP100)',
														
 
															+            'PFCs(1)': 'PFCS (AR4GWP100)',
														
 
															+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
														
 
															+        },
														
 
															+        "coords_defaults": {
														
 
															+            "class": "Total",
														
 
															+        },
														
 
															+    },  # tested
														
 
															+    "Summary1.As2": {  # Summary 1, sheet 2
														
 
															+        "status": "tested",
														
 
															+         "table": {
														
 
															+            "firstrow": 5,
														
 
															+            "lastrow": 34,
														
 
															+            "header": ['entity', 'entity', 'unit'],
														
 
															+            "header_fill": [True, False, True],
														
 
															+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
														
 
															+            "categories": ["category"],
														
 
															+            "cols_to_ignore": [],
														
 
															+            "stop_cats": ["", np.nan],
														
 
															+            "unit_info": unit_info["summary"],
														
 
															+        },
														
 
															+        "sector_mapping": [
														
 
															+            ['3. Agriculture', ['3']],
														
 
															+            ['A. Enteric fermentation', ['3.A']],
														
 
															+            ['B. Manure management', ['3.B']],
														
 
															+            ['C. Rice cultivation', ['3.C']],
														
 
															+            ['D. Agricultural soils', ['3.D']],
														
 
															+            ['E. Prescribed burning of savannas', ['3.E']],
														
 
															+            ['F. Field burning of agricultural residues', ['3.F']],
														
 
															+            ['G. Liming', ['3.G']],
														
 
															+            ['H. Urea application', ['3.H']],
														
 
															+            ['I. Other carbon-contining fertilizers', ['3.I']],
														
 
															+            ['J. Other', ['3.J']],
														
 
															+            ['4. Land use, land-use change and forestry (4)', ['4']],
														
 
															+            ['A. Forest land (4)', ['4.A']],
														
 
															+            ['B. Cropland (4)', ['4.B']],
														
 
															+            ['C. Grassland (4)', ['4.C']],
														
 
															+            ['D. Wetlands (4)', ['4.D']],
														
 
															+            ['E. Settlements (4)', ['4.E']],
														
 
															+            ['F. Other land (4)', ['4.F']],
														
 
															+            ['G. Harvested wood products', ['4.G']],
														
 
															+            ['H. Other (4)', ['4.H']],
														
 
															+            ['5. Waste', ['5']],
														
 
															+            ['A. Solid waste disposal (5)', ['5.A']],
														
 
															+            ['B. Biological treatment of solid waste (5)', ['5.B']],
														
 
															+            ['C. Incineration and open burning of waste (5)', ['5.C']],
														
 
															+            ['D. Wastewater treatment and discharge', ['5.D']],
														
 
															+            ['E. Other (5)', ['5.E']],
														
 
															+            ['6. Other (please specify)(6)', ['6']],
														
 
															+        ],
														
 
															+        "entity_mapping": {
														
 
															+            'NOX': 'NOx',
														
 
															+            'Net CO2 emissions/removals': 'CO2',
														
 
															+            'HFCs (1)': 'HFCS (AR4GWP100)',
														
 
															+            'PFCs(1)': 'PFCS (AR4GWP100)',
														
 
															+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
														
 
															+        },
														
 
															+        "coords_defaults": {
														
 
															+            "class": "Total",
														
 
															+        },
														
 
															+    },  # tested
														
 
															+    "Summary1.As3": {  # Summary 1, sheet 3
														
 
															+        "status": "tested",
														
 
															+         "table": {
														
 
															+            "firstrow": 5,
														
 
															+            "lastrow": 17,
														
 
															+            "header": ['entity', 'entity', 'unit'],
														
 
															+            "header_fill": [True, False, True],
														
 
															+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
														
 
															+            "categories": ["category"],
														
 
															+            "cols_to_ignore": [],
														
 
															+            "stop_cats": ["", np.nan],
														
 
															+            "unit_info": unit_info["summary"],
														
 
															+        },
														
 
															+        "sector_mapping": [
														
 
															+            ['Memo items:(7)', ['\IGNORE']],
														
 
															+            ['International bunkers', ['M.Memo.Int']],
														
 
															+            ['Aviation', ['M.Memo.Int.Avi']],
														
 
															+            ['Navigation', ['M.Memo.Int.Mar']],
														
 
															+            ['Multilateral operations', ['M.Memo.Mult']],
														
 
															+            ['CO2 emissions from biomass', ['M.Memo.Bio']],
														
 
															+            ['CO2 captured', ['M.Memo.CO2Cap']],
														
 
															+            ['Long-term storage of C in waste disposal sites', ['M.Memo.LTSW']],
														
 
															+            ['Indirect N2O', ['M.Memo.IndN2O']],
														
 
															+            ['Indirect CO2', ['M.Memo.IndCO2']],
														
 
															+        ],
														
 
															+        "entity_mapping": {
														
 
															+            'NOX': 'NOx',
														
 
															+            'Net CO2 emissions/removals': 'CO2',
														
 
															+            'HFCs(1)': 'HFCS (AR4GWP100)',
														
 
															+            'PFCs(1)': 'PFCS (AR4GWP100)',
														
 
															+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
														
 
															         },
														
 
															         "coords_defaults": {
														
 
															             "class": "Total",
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
@@ -5,3 +5,5 @@ Define the CRF specifications here for easy access
 
															 from .CRF2021_specification import CRF2021
														
 
															 from .CRF2022_specification import CRF2022
														
 
															 from .CRF2023_specification import CRF2023
														
 
															+
														
 
															+__all__ = ["CRF2021", "CRF2022", "CRF2023"]
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
@@ -29,4 +29,14 @@ unit_info = {
 
															         },
														
 
															         "default_unit": "kt",
														
 
															     },
														
 
															+    "summary": {  # contains fgas mixtures in CO2 eq units
														
 
															+        "unit_row": 0,
														
 
															+        "entity_row": "header",
														
 
															+        "regexp_entity": r".*",
														
 
															+        "regexp_unit": r"\((.*)\)",
														
 
															+        "manual_repl_unit": {
														
 
															+            "(kt CO2 equivalent)": "kt CO2eq",
														
 
															+        },
														
 
															+        "default_unit": "kt",
														
 
															+    },
														
 
															 }
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
@@ -3,11 +3,11 @@ This script is a wrapper around the read_crf_for_country
 
															 function such that it can be called from datalad
														
 
															 """
														
 
															-from UNFCCC_CRF_reader_prod import read_crf_for_country
														
 
															+from .UNFCCC_CRF_reader_prod import read_crf_for_country
														
 
															 import argparse
														
 
															 parser = argparse.ArgumentParser()
														
 
															-parser.add_argument('--country', help='Country name or code')
														
 
															+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
														
 
															 parser.add_argument('--submission_year', help='Submission round to read', type=int)
														
 
															 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
														
 
															 parser.add_argument('--re_read', help='Read data also if already read before',
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
@@ -4,11 +4,11 @@ from doit in the current setup where doit runs on system python and
 
															 not in the venv.
														
 
															 """
														
 
															-from UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
														
 
															+from .UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
														
 
															 import argparse
														
 
															 parser = argparse.ArgumentParser()
														
 
															-parser.add_argument('--country', help='Country name or code')
														
 
															+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
														
 
															 parser.add_argument('--submission_year', help='Submission round to read')
														
 
															 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
														
 
															 parser.add_argument('--re_read', help='Read data also if already read before',
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
@@ -3,7 +3,7 @@ This script is a wrapper around the read_crf_for_country
 
															 function such that it can be called from datalad
														
 
															 """
														
 
															-from UNFCCC_CRF_reader_prod import read_new_crf_for_year
														
 
															+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year
														
 
															 import argparse
														
 
															 parser = argparse.ArgumentParser()
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
@@ -4,7 +4,7 @@ from doit in the current setup where doit runs on system python and
 
															 not in the venv.
														
 
															 """
														
 
															-from UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
														
 
															+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
														
 
															 from util import NoCRFFilesError
														
 
															 import argparse
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
@@ -3,21 +3,27 @@ This script is a wrapper around the read_year_to_test_specs
 
															 function such that it can be called from datalad
														
 
															 """
														
 
															-from UNFCCC_CRF_reader_devel import read_year_to_test_specs
														
 
															+from UNFCCC_GHG_data.UNFCCC_CRF_reader.UNFCCC_CRF_reader_devel import read_year_to_test_specs
														
 
															 import argparse
														
 
															 parser = argparse.ArgumentParser()
														
 
															 parser.add_argument('--submission_year', help='Submission round to read', type=int)
														
 
															 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
														
 
															+parser.add_argument('--totest', help='read tables to test', action='store_true')
														
 
															 args = parser.parse_args()
														
 
															 submission_year = args.submission_year
														
 
															 data_year = args.data_year
														
 
															+if args.totest:
														
 
															+    totest = True
														
 
															+else:
														
 
															+    totest = False
														
 
															 read_year_to_test_specs(
														
 
															     submission_year=submission_year,
														
 
															     data_year=data_year,
														
 
															+    totest=totest,
														
 
															 )
														
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
															 root_path = Path(__file__).parents[2].absolute()
														
 
															 root_path = root_path.resolve()
														
 
															 log_path = root_path / "log"
														
 
															-code_path = root_path / "code"
														
 
															+code_path = root_path / "UNFCCC_GHG_data"
														
 
															 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
														
 
															 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
														
--- a/UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
@@ -86,7 +86,7 @@ cat_codes_manual = {  # conversion to PRIMAP1 format
 
															     'S/N': 'MMULTIOP',
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
														
 
															+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
														
 
															 time_format = "%Y"
														
@@ -219,7 +219,7 @@ for page in pages_to_read:
 
															     if page in range(232, 235):
														
 
															         df_current.iloc[
														
 
															             metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
														
 
															-    # drop all rows where the index cols (category code and name) are both NaN
														
 
															+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
														
 
															     # as without one of them there is no category information
														
 
															     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															     # set index. necessary for the stack operation in the conversion to long format
														
@@ -253,7 +253,7 @@ for page in pages_to_read:
 
															     df_current["category"] = df_current["category"].replace(cat_codes_manual)
														
 
															     # then the regex replacements
														
 
															-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
														
 
															     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
														
 
															                                                                 regex=True)
														
@@ -311,7 +311,7 @@ for page in pages_to_read_fgases:
 
															         dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
														
 
															     df_current = df_current.drop(idx_header)
														
 
															-    # drop all rows where the index cols (category code and name) are both NaN
														
 
															+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
														
 
															     # as without one of them there is no category information
														
 
															     df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
														
 
															     # set index. necessary for the stack operation in the conversion to long format
														
@@ -350,7 +350,7 @@ for page in pages_to_read_fgases:
 
															     df_current["category"] = df_current["category"].replace(cat_codes_manual)
														
 
															     # then the regex repalcements
														
 
															-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
														
 
															+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
														
 
															     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
														
 
															                                                                 regex=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
@@ -64,7 +64,7 @@ filter_remove_IPCC2006 = {
 
															 }
														
 
															-cat_mapping = { # categories not listed here have the same code as in IPCC 2006 specifications
														
 
															+cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
														
 
															     '3': 'M.AG',
														
 
															     '3.A': '3.A.1',
														
 
															     '3.A.1': '3.A.1.a',
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
@@ -52,7 +52,7 @@ unit_info = {
 
															 }
														
 
															 cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
														
 
															                 'Categorías de fuente y sumidero de gases de efecto invernadero.1']
														
 
															-# columns for category code and original category name
														
 
															+# columns for category UNFCCC_GHG_data and original category name
														
 
															 index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
														
 
															 # operations on long format DF
														
@@ -169,7 +169,7 @@ for year in years_to_read:
 
															     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
														
 
															     # drop the columns which are empty and repetition of the metadata for the second block
														
 
															     df_current.drop(cols_to_drop, axis=1, inplace=True)
														
 
															-    # drop all rows where the index cols (category code and name) are both NaN
														
 
															+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
														
 
															     # as without one of them there is no category information
														
 
															     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															     # set multi-index. necessary for the stack operation in the conversion to long format
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
@@ -38,7 +38,7 @@ year = 2019
 
															 entity_row = 0
														
 
															 unit_row = 1
														
 
															 index_cols = "Categories"
														
 
															-# special header as category code and name in one column
														
 
															+# special header as category UNFCCC_GHG_data and name in one column
														
 
															 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
@@ -51,7 +51,7 @@ cat_codes_manual = {
 
															     #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
														
 
															+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9]{1,4})\s.*'
														
 
															 coords_cols = {
														
 
															     "category": "category",
														
@@ -202,7 +202,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
															 # first the manual replacements
														
 
															 df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															 # then the regex replacements
														
 
															-repl = lambda m: m.group('code')
														
 
															+repl = lambda m: m.group('UNFCCC_GHG_data')
														
 
															 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															 df_all = df_all.reset_index(drop=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
@@ -32,7 +32,7 @@ entity_row = 0
 
															 unit_row = 1
														
 
															 index_cols = "Categorías de fuentes y sumideros de GEI"
														
 
															-# special header as category code and name in one column
														
 
															+# special header as category UNFCCC_GHG_data and name in one column
														
 
															 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															 units = {
														
@@ -53,7 +53,7 @@ cat_codes_manual = {
 
															     '2F6 Otras aplicaciones': '2F6',
														
 
															 }
														
 
															-cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
														
 
															+cat_code_regexp = r'^\[(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,3})\].*'
														
 
															 coords_cols = {
														
 
															     "category": "category",
														
@@ -168,7 +168,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
															 # first the manual replacements
														
 
															 df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															 # then the regex replacements
														
 
															-repl = lambda m: m.group('code')
														
 
															+repl = lambda m: m.group('UNFCCC_GHG_data')
														
 
															 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															 df_all = df_all.reset_index(drop=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -129,7 +129,7 @@ for i, table in enumerate(tables):
 
															         unit_parts = unit.split(" ")
														
 
															         unit = f"{unit_parts[0]} CO2eq"
														
 
															-    # remove "/n" from category code and name columns
														
 
															+    # remove "/n" from category UNFCCC_GHG_data and name columns
														
 
															     df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
														
 
															     df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -32,7 +32,7 @@ pages_to_read = range(104, 138)
 
															 compression = dict(zlib=True, complevel=9)
														
 
															-# special header as category code and name in one column
														
 
															+# special header as category UNFCCC_GHG_data and name in one column
														
 
															 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															 index_cols = ['Catégories']
														
@@ -58,7 +58,7 @@ cat_codes_manual = {
 
															     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
														
 
															 }
														
 
															-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
														
 
															+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
														
 
															 coords_terminologies = {
														
 
															     "area": "ISO3",
														
@@ -171,7 +171,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
															 # first the manual replacements
														
 
															 df_all["category"] = df_all["category"].replace(cat_codes_manual)
														
 
															 # then the regex replacements
														
 
															-repl = lambda m: m.group('code')
														
 
															+repl = lambda m: m.group('UNFCCC_GHG_data')
														
 
															 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															 df_all = df_all.reset_index(drop=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
@@ -37,7 +37,7 @@ years_to_read = range(1990, 2019 + 1)
 
															 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
														
 
															 cols_to_read = range(1, 2019 - 1990 + 3)
														
 
															-# columns for category code and original category name
														
 
															+# columns for category UNFCCC_GHG_data and original category name
														
 
															 index_cols = ['분야·부문/연도']
														
 
															 sheet_metadata = {
														
@@ -136,7 +136,7 @@ for sheet in sheets_to_read:
 
															     # read current sheet (one sheet per gas)
														
 
															     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
														
 
															                                engine="openpyxl")
														
 
															-    # drop all rows where the index cols (category code and name) are both NaN
														
 
															+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
														
 
															     # as without one of them there is no category information
														
 
															     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															     # set index. necessary for the stack operation in the conversion to long format
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
@@ -32,7 +32,7 @@ years_to_read = range(1990, 2018 + 1)
 
															 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
														
 
															 cols_to_read = range(1, 2018 - 1990 + 3)
														
 
															-# columns for category code and original category name
														
 
															+# columns for category UNFCCC_GHG_data and original category name
														
 
															 index_cols = ['분야·부문/연도']
														
 
															 sheet_metadata = {
														
@@ -131,7 +131,7 @@ for sheet in sheets_to_read:
 
															     # read current sheet (one sheet per gas)
														
 
															     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
														
 
															                                engine="openpyxl")
														
 
															-    # drop all rows where the index cols (category code and name) are both NaN
														
 
															+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
														
 
															     # as without one of them there is no category information
														
 
															     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
														
 
															     # set index. necessary for the stack operation in the conversion to long format
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
@@ -32,7 +32,7 @@ if not output_folder.exists():
 
															 output_filename = 'TWN_inventory_2022_'
														
 
															 inventory_file = '00_abstract_en.pdf'
														
 
															-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
														
 
															+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
														
 
															 time_format = "%Y"
														
@@ -227,7 +227,7 @@ for table_name in table_defs.keys():
 
															     df_this_table["category"] = df_this_table["category"].replace(
														
 
															         table_def["cat_codes_manual"])
														
 
															     # then the regex replacements
														
 
															-    repl = lambda m: m.group('code')
														
 
															+    repl = lambda m: m.group('UNFCCC_GHG_data')
														
 
															     df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
														
 
															                                                                       repl, regex=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -44,7 +44,7 @@ unit_row = 1
 
															 gwp_to_use = "AR4GWP100"
														
 
															 index_cols = "Greenhouse gas source and sink categories"
														
 
															-# special header as category code and name in one column
														
 
															+# special header as category UNFCCC_GHG_data and name in one column
														
 
															 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
														
 
															 # manual category codes
														
@@ -54,7 +54,7 @@ cat_codes_manual = {
 
															     'CO2 from Biomass': 'MBIO',
														
 
															 }
														
 
															-cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
														
 
															+cat_code_regexp = r'^(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,4})[\s\.].*'
														
 
															 coords_cols = {
														
 
															     "category": "category",
														
@@ -184,7 +184,7 @@ df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
 
															 # first the manual replacements
														
 
															 df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
														
 
															 # then the regex replacements
														
 
															-repl = lambda m: m.group('code')
														
 
															+repl = lambda m: m.group('UNFCCC_GHG_data')
														
 
															 df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
														
 
															 df_inventory_long = df_inventory_long.reset_index(drop=True)
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/__init__.py
@@ -0,0 +1,6 @@
 
															+# expose some of the functions to the outside as they are used in other readers as well
														
 
															+# TODO: create a unified util module for all readers
														
 
															+
														
 
															+from .get_submissions_info import get_country_code
														
 
															+
														
 
															+__all__ = ["get_country_code"]
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/country_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/country_info.py
@@ -8,7 +8,7 @@ from get_submissions_info import get_country_datasets
 
															 # Find the right function and possible input and output files and
														
 
															 # read the data using datalad run.
														
 
															 parser = argparse.ArgumentParser()
														
 
															-parser.add_argument('--country', help='Country name or code')
														
 
															+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
														
 
															 args = parser.parse_args()
														
 
															 country = args.country
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
+++ b/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
--- a/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
@@ -9,7 +9,7 @@ import pycountry
 
															 root_path = Path(__file__).parents[2].absolute()
														
 
															 root_path = root_path.resolve()
														
 
															-code_path = root_path / "code" / "UNFCCC_reader"
														
 
															+code_path = root_path / "UNFCCC_GHG_data" / "UNFCCC_reader"
														
 
															 # beware, folders below are different than for CRF reader
														
 
															 downloaded_data_path = root_path / "downloaded_data"
														
 
															 extracted_data_path = root_path / "extracted_data"
														
@@ -46,14 +46,14 @@ def get_country_submissions(
 
															         print_sub: bool = True,
														
 
															 ) -> Dict[str, List[str]]:
														
 
															     """
														
 
															-    Input is a three letter ISO code for a country, or the countries name.
														
 
															-    The function tries to map the country name to an ISO code and then
														
 
															+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the countries name.
														
 
															+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
														
 
															     queries the folder mapping files for folders.
														
 
															     Parameters
														
 
															     ----------
														
 
															         country_name: str
														
 
															-            String containing the country name or ISO 3 letter code
														
 
															+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
														
 
															         print_sub: bool
														
 
															             If True information on submissions will be written to stdout
														
@@ -70,7 +70,7 @@ def get_country_submissions(
 
															     country_code = get_country_code(country_name)
														
 
															     if print_sub:
														
 
															-        print(f"Country name {country_name} maps to ISO code {country_code}")
														
 
															+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
														
 
															     country_submissions = {}
														
 
															     if print_sub:
														
@@ -115,14 +115,14 @@ def get_country_datasets(
 
															         print_ds: bool = True,
														
 
															 ) -> Dict[str, List[str]]:
														
 
															     """
														
 
															-    Input is a three letter ISO code for a country, or the country's name.
														
 
															-    The function tries to map the country name to an ISO code and then
														
 
															-    checks the code and data folders for content on the country.
														
 
															+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the country's name.
														
 
															+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
														
 
															+    checks the UNFCCC_GHG_data and data folders for content on the country.
														
 
															     Parameters
														
 
															     ----------
														
 
															         country_name: str
														
 
															-            String containing the country name or ISO 3 letter code
														
 
															+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
														
 
															         print_ds: bool
														
 
															             If True information on submissions will be written to stdout
														
@@ -138,11 +138,11 @@ def get_country_datasets(
 
															     data_folder_legacy = legacy_data_path
														
 
															-    # obtain country code
														
 
															+    # obtain country UNFCCC_GHG_data
														
 
															     country_code = get_country_code(country_name)
														
 
															     if print_ds:
														
 
															-        print(f"Country name {country_name} maps to ISO code {country_code}")
														
 
															+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
														
 
															     rep_data = {}
														
 
															     # data
														
@@ -181,7 +181,7 @@ def get_country_datasets(
 
															                     # process filename to get submission
														
 
															                     parts = dataset.split('_')
														
 
															                     if parts[0] != country_code:
														
 
															-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
														
 
															+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
														
 
															                     else:
														
 
															                         terminology = "_".join(parts[3 : ])
														
 
															                         key = f"{parts[1]} ({parts[2]}, {terminology})"
														
@@ -197,9 +197,9 @@ def get_country_datasets(
 
															                         code_file = get_code_file(country_code, parts[1])
														
 
															                         if code_file:
														
 
															-                            data_info = data_info + f"code: {code_file.name}"
														
 
															+                            data_info = data_info + f"UNFCCC_GHG_data: {code_file.name}"
														
 
															                         else:
														
 
															-                            data_info = data_info + f"code: not found"
														
 
															+                            data_info = data_info + f"UNFCCC_GHG_data: not found"
														
 
															                         cleaned_datasets_current_folder[key] = data_info
														
@@ -250,7 +250,7 @@ def get_country_datasets(
 
															                     # process filename to get submission
														
 
															                     parts = dataset.split('_')
														
 
															                     if parts[0] != country_code:
														
 
															-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
														
 
															+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
														
 
															                     else:
														
 
															                         terminology = "_".join(parts[3 : ])
														
 
															                         key = f"{parts[1]} ({parts[2]}, {terminology}, legacy)"
														
@@ -288,13 +288,13 @@ def get_country_code(
 
															         country_name: str,
														
 
															 )->str:
														
 
															     """
														
 
															-    obtain country code. If the input is a code it will be returned, if the input
														
 
															-    is not a three letter code a search will be performed
														
 
															+    obtain country UNFCCC_GHG_data. If the input is a UNFCCC_GHG_data it will be returned, if the input
														
 
															+    is not a three letter UNFCCC_GHG_data a search will be performed
														
 
															     Parameters
														
 
															     __________
														
 
															     country_name: str
														
 
															-        Country code or name to get the three-letter code for.
														
 
															+        Country UNFCCC_GHG_data or name to get the three-letter UNFCCC_GHG_data for.
														
 
															     """
														
 
															     # First check if it's in the list of custom codes
														
@@ -302,7 +302,7 @@ def get_country_code(
 
															         country_code = country_name
														
 
															     else:
														
 
															         try:
														
 
															-            # check if it's a 3 letter code
														
 
															+            # check if it's a 3 letter UNFCCC_GHG_data
														
 
															             country = pycountry.countries.get(alpha_3=country_name)
														
 
															             country_code = country.alpha_3
														
 
															         except:
														
@@ -310,7 +310,7 @@ def get_country_code(
 
															                 country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
														
 
															             except:
														
 
															                 raise ValueError(f"Country name {country_name} can not be mapped to "
														
 
															-                                 f"any country code. Try using the ISO3 code directly.")
														
 
															+                                 f"any country UNFCCC_GHG_data. Try using the ISO3 UNFCCC_GHG_data directly.")
														
 
															             if len(country) > 1:
														
 
															                 country_code = None
														
 
															                 for current_country in country:
														
@@ -337,13 +337,13 @@ def get_possible_inputs(
 
															     Parameters
														
 
															     ----------
														
 
															         country_name: str
														
 
															-            String containing the country name or ISO 3 letter code
														
 
															+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
														
 
															         submission: str
														
 
															             String of the submission
														
 
															         print_info: bool = False
														
 
															-            If True print information on code found
														
 
															+            If True print information on UNFCCC_GHG_data found
														
 
															     Returns
														
 
															     -------
														
@@ -352,11 +352,11 @@ def get_possible_inputs(
 
															     data_folder = downloaded_data_path
														
 
															-    # obtain country code
														
 
															+    # obtain country UNFCCC_GHG_data
														
 
															     country_code = get_country_code(country_name)
														
 
															     if print_info:
														
 
															-        print(f"Country name {country_name} maps to ISO code {country_code}")
														
 
															+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
														
 
															     input_files = []
														
 
															     for item in data_folder.iterdir():
														
@@ -399,7 +399,7 @@ def get_possible_outputs(
 
															     Parameters
														
 
															     ----------
														
 
															         country_name: str
														
 
															-            String containing the country name or ISO 3 letter code
														
 
															+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
														
 
															         submission: str
														
 
															             String of the submission
														
@@ -414,10 +414,10 @@ def get_possible_outputs(
 
															     data_folder = extracted_data_path
														
 
															-    # obtain country code
														
 
															+    # obtain country UNFCCC_GHG_data
														
 
															     country_code = get_country_code(country_name)
														
 
															     if print_info:
														
 
															-        print(f"Country name {country_name} maps to ISO code {country_code}")
														
 
															+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
														
 
															     output_files = []
														
 
															     for item in data_folder.iterdir():
														
@@ -457,17 +457,17 @@ def get_code_file(
 
															     Parameters
														
 
															     ----------
														
 
															         country_name: str
														
 
															-            String containing the country name or ISO 3 letter code
														
 
															+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
														
 
															         submission: str
														
 
															             String of the submission
														
 
															         print_info: bool = False
														
 
															-            If True print information on code found
														
 
															+            If True print information on UNFCCC_GHG_data found
														
 
															     Returns
														
 
															     -------
														
 
															-        returns a pathlib Path object for the code file
														
 
															+        returns a pathlib Path object for the UNFCCC_GHG_data file
														
 
															     """
														
 
															     code_file_path = None
														
@@ -477,18 +477,18 @@ def get_code_file(
 
															     if submission[0:3] == "CRF":
														
 
															         return root_path / "UNFCCC_CRF_reader"
														
 
															-    # obtain country code
														
 
															+    # obtain country UNFCCC_GHG_data
														
 
															     country_code = get_country_code(country_name)
														
 
															     if print_info:
														
 
															-        print(f"Country name {country_name} maps to ISO code {country_code}")
														
 
															+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
														
 
															     with open(code_path / "folder_mapping.json", "r") as mapping_file:
														
 
															         folder_mapping = json.load(mapping_file)
														
 
															     if country_code not in folder_mapping:
														
 
															         if print_info:
														
 
															-            print("No code available")
														
 
															+            print("No UNFCCC_GHG_data available")
														
 
															             print("")
														
 
															     else:
														
 
															         country_folder = code_path / folder_mapping[country_code]
														
@@ -497,13 +497,13 @@ def get_code_file(
 
															         for file in country_folder.iterdir():
														
 
															             if file.match(code_file_name_candidate):
														
 
															                 if code_file_path is not None:
														
 
															-                    raise ValueError(f"Found multiple code candidates: "
														
 
															+                    raise ValueError(f"Found multiple UNFCCC_GHG_data candidates: "
														
 
															                                      f"{code_file_path} and file.name. "
														
 
															                                      f"Please use only one file with name "
														
 
															                                      f"'read_ISO3_submission_XXX.YYY'.")
														
 
															                 else:
														
 
															                     if print_info:
														
 
															-                        print(f"Found code file {file.relative_to(root_path)}")
														
 
															+                        print(f"Found UNFCCC_GHG_data file {file.relative_to(root_path)}")
														
 
															                 code_file_path = file
														
 
															     if code_file_path is not None:
														
--- a/UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
@@ -14,7 +14,7 @@ from get_submissions_info import get_possible_outputs
 
															 # Find the right function and possible input and output files and
														
 
															 # read the data using datalad run.
														
 
															 parser = argparse.ArgumentParser()
														
 
															-parser.add_argument('--country', help='Country name or code')
														
 
															+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
														
 
															 parser.add_argument('--submission', help='Submission to read')
														
 
															 args = parser.parse_args()
														
@@ -34,7 +34,7 @@ print("")
 
															 script_name = get_code_file(country, submission)
														
 
															 if script_name is not None:
														
 
															-    print(f"Found code file {script_name}")
														
 
															+    print(f"Found UNFCCC_GHG_data file {script_name}")
														
 
															     print("")
														
 
															     # get possible input files
														
@@ -77,8 +77,8 @@ if script_name is not None:
 
															         explicit=True,
														
 
															     )
														
 
															 else:
														
 
															-    # no code found.
														
 
															-    print(f"No code found to read {submission} from {country}")
														
 
															+    # no UNFCCC_GHG_data found.
														
 
															+    print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
														
 
															     print(f"Use 'doit country_info --country={country} to get "
														
 
															           f"a list of available submissions and datasets.")
														
--- a/UNFCCC_GHG_data/__init__.py
+++ b/UNFCCC_GHG_data/__init__.py
@@ -0,0 +1,8 @@
 
															+####
														
 
															+
														
 
															+from . import UNFCCC_reader
														
 
															+from . import UNFCCC_CRF_reader
														
 
															+# import UNFCCC_DI_reader
														
 
															+# import UNFCCC_downloader
														
 
															+
														
 
															+__all__ = ["UNFCCC_reader", "UNFCCC_CRF_reader"]
														
--- a/code/requirements.txt
+++ b/code/requirements.txt
@@ -1,12 +0,0 @@
 
															-bs4
														
 
															-requests
														
 
															-pandas
														
 
															-selenium
														
 
															-primap2
														
 
															-countrynames
														
 
															-pycountry
														
 
															-datalad
														
 
															-treelib
														
 
															-camelot-py
														
 
															-opencv-python
														
 
															-ghostscript
														
--- a/dodo.py
+++ b/dodo.py
@@ -7,10 +7,12 @@ from doit import get_var
 
															 def task_setup_venv():
														
 
															     """Create virtual environment"""
														
 
															     return {
														
 
															-        'file_dep': ['code/requirements.txt'],
														
 
															+        'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
														
 
															         'actions': ['python3 -m venv venv',
														
 
															-                    './venv/bin/pip install --upgrade pip',
														
 
															-                    './venv/bin/pip install -Ur code/requirements.txt',
														
 
															+                    './venv/bin/pip install --upgrade pip wheel',
														
 
															+                    #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
														
 
															+                    './venv/bin/pip install --upgrade --upgrade-strategy '
														
 
															+                    'eager -e .[dev]',
														
 
															                     'touch venv',],
														
 
															         'targets': ['venv'],
														
 
															         'verbosity': 2,
														
@@ -27,7 +29,7 @@ def task_map_folders():
 
															     Create or update the folder mapping in the given folder
														
 
															     """
														
 
															     return {
														
 
															-        'actions': [f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder={read_config_folder['folder']}"],
														
 
															         'verbosity': 2,
														
 
															         'setup': ['setup_venv'],
														
@@ -41,7 +43,7 @@ def task_update_bur():
 
															         'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
														
 
															         'actions': ['datalad run -m "Fetch BUR submissions" '
														
 
															                     '-o downloaded_data/UNFCCC/submissions-bur.csv '
														
 
															-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_bur.py'],
														
 
															+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
														
 
															         'verbosity': 2,
														
 
															         'setup': ['setup_venv'],
														
 
															     }
														
@@ -55,8 +57,8 @@ def task_download_bur():
 
															         # before download
														
 
															         'actions': ['datalad run -m "Download BUR submissions" '
														
 
															                     '-i downloaded_data/UNFCCC/submissions-bur.csv '
														
 
															-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=BUR',
														
 
															-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder=downloaded_data/UNFCCC"
														
 
															                     ],
														
 
															         'verbosity': 2,
														
@@ -70,7 +72,7 @@ def task_update_nc():
 
															         'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
														
 
															         'actions': ['datalad run -m "Fetch NC submissions" '
														
 
															                     '-o downloaded_data/UNFCCC/submissions-nc.csv '
														
 
															-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_nc.py'],
														
 
															+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
														
 
															         'verbosity': 2,
														
 
															         'setup': ['setup_venv'],
														
 
															     }
														
@@ -84,8 +86,8 @@ def task_download_nc():
 
															         # before download
														
 
															         'actions': ['datalad run -m "Download NC submissions" '
														
 
															                     '-i downloaded_data/UNFCCC/submissions-nc.csv '
														
 
															-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=NC',
														
 
															-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder=downloaded_data/UNFCCC"
														
 
															                     ],
														
 
															         'verbosity': 2,
														
@@ -108,7 +110,7 @@ def task_update_annexi():
 
															         'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
														
 
															                     "--explicit "
														
 
															                     f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
														
 
															-                    f"./venv/bin/python code/UNFCCC_downloader/fetch_submissions_annexI.py "
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
														
 
															                     f"--year={update_aI_config['year']}"],
														
 
															         'verbosity': 2,
														
 
															         'setup': ['setup_venv'],
														
@@ -124,9 +126,9 @@ def task_download_annexi():
 
															         'actions': [f"datalad run -m 'Download AnnexI submissions for "
														
 
															                     f"{update_aI_config['category']}{update_aI_config['year']}' "
														
 
															                     f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
														
 
															-                    f"./venv/bin/python code/UNFCCC_downloader/download_annexI.py "
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
														
 
															                     f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
														
 
															-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder=downloaded_data/UNFCCC"
														
 
															                     ],
														
 
															         'verbosity': 2,
														
@@ -138,8 +140,8 @@ def task_download_ndc():
 
															     """ Download NDC submissions """
														
 
															     return {
														
 
															         'actions': ['datalad run -m "Download NDC submissions" '
														
 
															-                    './venv/bin/python code/UNFCCC_downloader/download_ndc.py',
														
 
															-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder=downloaded_data/UNFCCC"
														
 
															                     ],
														
 
															         'verbosity': 2,
														
@@ -157,11 +159,11 @@ read_config = {
 
															 # TODO: make individual task for non-UNFCCC submissions
														
 
															 def task_read_unfccc_submission():
														
 
															-    """ Read submission for a country (if code exists) (not for CRF)"""
														
 
															+    """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
														
 
															     return {
														
 
															-        'actions': [f"./venv/bin/python code/UNFCCC_reader/read_UNFCCC_submission.py "
														
 
															+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
														
 
															                     f"--country={read_config['country']} --submission={read_config['submission']}",
														
 
															-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                     f"--folder=extracted_data/UNFCCC"
														
 
															                     ],
														
 
															         'verbosity': 2,
														
@@ -182,11 +184,11 @@ read_config_crf = {
 
															 def task_read_unfccc_crf_submission():
														
 
															     """ Read CRF submission for a country """
														
 
															     actions = [
														
 
															-        f"./venv/bin/python code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
														
 
															+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
														
 
															         f"--country={read_config_crf['country']} "
														
 
															         f"--submission_year={read_config_crf['submission_year']} "
														
 
															         f"--submission_date={read_config_crf['submission_date']} ",
														
 
															-        f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															         f"--folder=extracted_data/UNFCCC"
														
 
															         ]
														
 
															     if read_config_crf["re_read"] == "True":
														
@@ -201,9 +203,9 @@ def task_read_unfccc_crf_submission():
 
															 def task_read_new_unfccc_crf_for_year():
														
 
															     """ Read CRF submission for all countries for given submission year. by default only reads
														
 
															     data not present yet. Only reads the latest updated submission for each country."""
														
 
															-    actions = [f"./venv/bin/python code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
														
 
															+    actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
														
 
															                f"--submission_year={read_config_crf['submission_year']} ",
														
 
															-               f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
														
 
															+               f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
														
 
															                f"--folder=extracted_data/UNFCCC"
														
 
															                ]
														
 
															     # specifying countries is currently disabled duo to problems with command line
														
@@ -224,7 +226,7 @@ def task_country_info():
 
															     """ Print information on submissions and datasets
														
 
															     available for given country"""
														
 
															     return {
														
 
															-        'actions': [f"./venv/bin/python code/UNFCCC_reader/country_info.py "
														
 
															+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/country_info.py "
														
 
															                     f"--country={read_config['country']}"],
														
 
															         'verbosity': 2,
														
 
															         'setup': ['setup_venv'],
														
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,8 @@
 
															+[build-system]
														
 
															+requires = [
														
 
															+    "setuptools>=42",
														
 
															+    "wheel",
														
 
															+    "setuptools_scm[toml]>=3.4"
														
 
															+]
														
 
															+build-backend = "setuptools.build_meta"
														
 
															+
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 
															+.
														
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -0,0 +1 @@
 
															+.[dev]
														
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,72 @@
 
															+[metadata]
														
 
															+name = UNFCCC_GHG_data
														
 
															+version = 0.2
														
 
															+author = Johannes Gütschow
														
 
															+author_email = mail@johannes-guetschow.de
														
 
															+description = Tools to read GHG data submitted to the UNFCCC using various methods
														
 
															+long_description = file: README.md
														
 
															+long_description_content_type = text/md
														
 
															+url = https://github.com/JGuetschow/UNFCCC_non-AnnexI_data
														
 
															+#project_urls =
														
 
															+classifiers =
														
 
															+    Development Status :: 3 - Alpha
														
 
															+    Intended Audience :: Science/Research
														
 
															+    Topic :: Scientific/Engineering :: Atmospheric Science
														
 
															+    License :: OSI Approved :: Apache Software License
														
 
															+    Natural Language :: English
														
 
															+    Programming Language :: Python :: 3
														
 
															+    Programming Language :: Python :: 3.8
														
 
															+    Programming Language :: Python :: 3.9
														
 
															+    Programming Language :: Python :: 3.10
														
 
															+license = Apache Software License 2.0
														
 
															+license_file = LICENSE
														
 
															+
														
 
															+[options]
														
 
															+packages =
														
 
															+    UNFCCC_GHG_data
														
 
															+    UNFCCC_GHG_data.UNFCCC_CRF_reader
														
 
															+    UNFCCC_GHG_data.UNFCCC_reader
														
 
															+    UNFCCC_GHG_data.UNFCCC_downloader
														
 
															+    #UNFCCC_GHG_data.UNFCCC_DI_reader
														
 
															+    #UNFCCC_GHG_data.datasets
														
 
															+python_requires = >=3.8
														
 
															+setup_requires =
														
 
															+    setuptools_scm
														
 
															+install_requires =
														
 
															+    bs4
														
 
															+    requests
														
 
															+    pandas
														
 
															+    selenium
														
 
															+    primap2
														
 
															+    countrynames
														
 
															+    pycountry
														
 
															+    datalad
														
 
															+    treelib
														
 
															+    camelot-py
														
 
															+    opencv-python
														
 
															+    ghostscript
														
 
															+
														
 
															+[options.extras_require]
														
 
															+dev =
														
 
															+    pip
														
 
															+    wheel
														
 
															+    bs4
														
 
															+    requests
														
 
															+    pandas
														
 
															+    selenium
														
 
															+    primap2
														
 
															+    countrynames
														
 
															+    pycountry
														
 
															+    datalad
														
 
															+    treelib
														
 
															+    camelot-py
														
 
															+    opencv-python
														
 
															+    ghostscript
														
 
															+    ipykernel
														
 
															+    jupyter
														
 
															+
														
 
															+
														
 
															+[options.package_data]
														
 
															+* =
														
 
															+    *.csv
														
 
															+    *.nc
														
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,5 @@
 
															+#!/usr/bin/env python
														
 
															+
														
 
															+import setuptools
														
 
															+
														
 
															+setuptools.setup()