před 1 rokem · 361eaac03f
--- a/.gitignore
+++ b/.gitignore
@@ -5,7 +5,7 @@ __pycache__
 
				 /JG_test_code/
			
 
				 .doit.db
			
 
				 log
			
 
				-code/datasets
			
 
				-code/UNFCCC_DI_reader
			
 
				+UNFCCC_GHG_data/datasets
			
 
				+UNFCCC_GHG_data/UNFCCC_DI_reader
			
 
				 datasets/UNFCCC/DI_NAI
			
 
				 
			
--- a/LICENSE
+++ b/LICENSE
@@ -0,0 +1,202 @@
 
				+
			
 
				+                                 Apache License
			
 
				+                           Version 2.0, January 2004
			
 
				+                        http://www.apache.org/licenses/
			
 
				+
			
 
				+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
			
 
				+
			
 
				+   1. Definitions.
			
 
				+
			
 
				+      "License" shall mean the terms and conditions for use, reproduction,
			
 
				+      and distribution as defined by Sections 1 through 9 of this document.
			
 
				+
			
 
				+      "Licensor" shall mean the copyright owner or entity authorized by
			
 
				+      the copyright owner that is granting the License.
			
 
				+
			
 
				+      "Legal Entity" shall mean the union of the acting entity and all
			
 
				+      other entities that control, are controlled by, or are under common
			
 
				+      control with that entity. For the purposes of this definition,
			
 
				+      "control" means (i) the power, direct or indirect, to cause the
			
 
				+      direction or management of such entity, whether by contract or
			
 
				+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
			
 
				+      outstanding shares, or (iii) beneficial ownership of such entity.
			
 
				+
			
 
				+      "You" (or "Your") shall mean an individual or Legal Entity
			
 
				+      exercising permissions granted by this License.
			
 
				+
			
 
				+      "Source" form shall mean the preferred form for making modifications,
			
 
				+      including but not limited to software source code, documentation
			
 
				+      source, and configuration files.
			
 
				+
			
 
				+      "Object" form shall mean any form resulting from mechanical
			
 
				+      transformation or translation of a Source form, including but
			
 
				+      not limited to compiled object code, generated documentation,
			
 
				+      and conversions to other media types.
			
 
				+
			
 
				+      "Work" shall mean the work of authorship, whether in Source or
			
 
				+      Object form, made available under the License, as indicated by a
			
 
				+      copyright notice that is included in or attached to the work
			
 
				+      (an example is provided in the Appendix below).
			
 
				+
			
 
				+      "Derivative Works" shall mean any work, whether in Source or Object
			
 
				+      form, that is based on (or derived from) the Work and for which the
			
 
				+      editorial revisions, annotations, elaborations, or other modifications
			
 
				+      represent, as a whole, an original work of authorship. For the purposes
			
 
				+      of this License, Derivative Works shall not include works that remain
			
 
				+      separable from, or merely link (or bind by name) to the interfaces of,
			
 
				+      the Work and Derivative Works thereof.
			
 
				+
			
 
				+      "Contribution" shall mean any work of authorship, including
			
 
				+      the original version of the Work and any modifications or additions
			
 
				+      to that Work or Derivative Works thereof, that is intentionally
			
 
				+      submitted to Licensor for inclusion in the Work by the copyright owner
			
 
				+      or by an individual or Legal Entity authorized to submit on behalf of
			
 
				+      the copyright owner. For the purposes of this definition, "submitted"
			
 
				+      means any form of electronic, verbal, or written communication sent
			
 
				+      to the Licensor or its representatives, including but not limited to
			
 
				+      communication on electronic mailing lists, source code control systems,
			
 
				+      and issue tracking systems that are managed by, or on behalf of, the
			
 
				+      Licensor for the purpose of discussing and improving the Work, but
			
 
				+      excluding communication that is conspicuously marked or otherwise
			
 
				+      designated in writing by the copyright owner as "Not a Contribution."
			
 
				+
			
 
				+      "Contributor" shall mean Licensor and any individual or Legal Entity
			
 
				+      on behalf of whom a Contribution has been received by Licensor and
			
 
				+      subsequently incorporated within the Work.
			
 
				+
			
 
				+   2. Grant of Copyright License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      copyright license to reproduce, prepare Derivative Works of,
			
 
				+      publicly display, publicly perform, sublicense, and distribute the
			
 
				+      Work and such Derivative Works in Source or Object form.
			
 
				+
			
 
				+   3. Grant of Patent License. Subject to the terms and conditions of
			
 
				+      this License, each Contributor hereby grants to You a perpetual,
			
 
				+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
			
 
				+      (except as stated in this section) patent license to make, have made,
			
 
				+      use, offer to sell, sell, import, and otherwise transfer the Work,
			
 
				+      where such license applies only to those patent claims licensable
			
 
				+      by such Contributor that are necessarily infringed by their
			
 
				+      Contribution(s) alone or by combination of their Contribution(s)
			
 
				+      with the Work to which such Contribution(s) was submitted. If You
			
 
				+      institute patent litigation against any entity (including a
			
 
				+      cross-claim or counterclaim in a lawsuit) alleging that the Work
			
 
				+      or a Contribution incorporated within the Work constitutes direct
			
 
				+      or contributory patent infringement, then any patent licenses
			
 
				+      granted to You under this License for that Work shall terminate
			
 
				+      as of the date such litigation is filed.
			
 
				+
			
 
				+   4. Redistribution. You may reproduce and distribute copies of the
			
 
				+      Work or Derivative Works thereof in any medium, with or without
			
 
				+      modifications, and in Source or Object form, provided that You
			
 
				+      meet the following conditions:
			
 
				+
			
 
				+      (a) You must give any other recipients of the Work or
			
 
				+          Derivative Works a copy of this License; and
			
 
				+
			
 
				+      (b) You must cause any modified files to carry prominent notices
			
 
				+          stating that You changed the files; and
			
 
				+
			
 
				+      (c) You must retain, in the Source form of any Derivative Works
			
 
				+          that You distribute, all copyright, patent, trademark, and
			
 
				+          attribution notices from the Source form of the Work,
			
 
				+          excluding those notices that do not pertain to any part of
			
 
				+          the Derivative Works; and
			
 
				+
			
 
				+      (d) If the Work includes a "NOTICE" text file as part of its
			
 
				+          distribution, then any Derivative Works that You distribute must
			
 
				+          include a readable copy of the attribution notices contained
			
 
				+          within such NOTICE file, excluding those notices that do not
			
 
				+          pertain to any part of the Derivative Works, in at least one
			
 
				+          of the following places: within a NOTICE text file distributed
			
 
				+          as part of the Derivative Works; within the Source form or
			
 
				+          documentation, if provided along with the Derivative Works; or,
			
 
				+          within a display generated by the Derivative Works, if and
			
 
				+          wherever such third-party notices normally appear. The contents
			
 
				+          of the NOTICE file are for informational purposes only and
			
 
				+          do not modify the License. You may add Your own attribution
			
 
				+          notices within Derivative Works that You distribute, alongside
			
 
				+          or as an addendum to the NOTICE text from the Work, provided
			
 
				+          that such additional attribution notices cannot be construed
			
 
				+          as modifying the License.
			
 
				+
			
 
				+      You may add Your own copyright statement to Your modifications and
			
 
				+      may provide additional or different license terms and conditions
			
 
				+      for use, reproduction, or distribution of Your modifications, or
			
 
				+      for any such Derivative Works as a whole, provided Your use,
			
 
				+      reproduction, and distribution of the Work otherwise complies with
			
 
				+      the conditions stated in this License.
			
 
				+
			
 
				+   5. Submission of Contributions. Unless You explicitly state otherwise,
			
 
				+      any Contribution intentionally submitted for inclusion in the Work
			
 
				+      by You to the Licensor shall be under the terms and conditions of
			
 
				+      this License, without any additional terms or conditions.
			
 
				+      Notwithstanding the above, nothing herein shall supersede or modify
			
 
				+      the terms of any separate license agreement you may have executed
			
 
				+      with Licensor regarding such Contributions.
			
 
				+
			
 
				+   6. Trademarks. This License does not grant permission to use the trade
			
 
				+      names, trademarks, service marks, or product names of the Licensor,
			
 
				+      except as required for reasonable and customary use in describing the
			
 
				+      origin of the Work and reproducing the content of the NOTICE file.
			
 
				+
			
 
				+   7. Disclaimer of Warranty. Unless required by applicable law or
			
 
				+      agreed to in writing, Licensor provides the Work (and each
			
 
				+      Contributor provides its Contributions) on an "AS IS" BASIS,
			
 
				+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
			
 
				+      implied, including, without limitation, any warranties or conditions
			
 
				+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
			
 
				+      PARTICULAR PURPOSE. You are solely responsible for determining the
			
 
				+      appropriateness of using or redistributing the Work and assume any
			
 
				+      risks associated with Your exercise of permissions under this License.
			
 
				+
			
 
				+   8. Limitation of Liability. In no event and under no legal theory,
			
 
				+      whether in tort (including negligence), contract, or otherwise,
			
 
				+      unless required by applicable law (such as deliberate and grossly
			
 
				+      negligent acts) or agreed to in writing, shall any Contributor be
			
 
				+      liable to You for damages, including any direct, indirect, special,
			
 
				+      incidental, or consequential damages of any character arising as a
			
 
				+      result of this License or out of the use or inability to use the
			
 
				+      Work (including but not limited to damages for loss of goodwill,
			
 
				+      work stoppage, computer failure or malfunction, or any and all
			
 
				+      other commercial damages or losses), even if such Contributor
			
 
				+      has been advised of the possibility of such damages.
			
 
				+
			
 
				+   9. Accepting Warranty or Additional Liability. While redistributing
			
 
				+      the Work or Derivative Works thereof, You may choose to offer,
			
 
				+      and charge a fee for, acceptance of support, warranty, indemnity,
			
 
				+      or other liability obligations and/or rights consistent with this
			
 
				+      License. However, in accepting such obligations, You may act only
			
 
				+      on Your own behalf and on Your sole responsibility, not on behalf
			
 
				+      of any other Contributor, and only if You agree to indemnify,
			
 
				+      defend, and hold each Contributor harmless for any liability
			
 
				+      incurred by, or claims asserted against, such Contributor by reason
			
 
				+      of your accepting any such warranty or additional liability.
			
 
				+
			
 
				+   END OF TERMS AND CONDITIONS
			
 
				+
			
 
				+   APPENDIX: How to apply the Apache License to your work.
			
 
				+
			
 
				+      To apply the Apache License to your work, attach the following
			
 
				+      boilerplate notice, with the fields enclosed by brackets "[]"
			
 
				+      replaced with your own identifying information. (Don't include
			
 
				+      the brackets!)  The text should be enclosed in the appropriate
			
 
				+      comment syntax for the file format. We also recommend that a
			
 
				+      file or class name and description of purpose be included on the
			
 
				+      same "printed page" as the copyright notice for easier
			
 
				+      identification within third-party archives.
			
 
				+
			
 
				+   Copyright [yyyy] [name of copyright owner]
			
 
				+
			
 
				+   Licensed under the Apache License, Version 2.0 (the "License");
			
 
				+   you may not use this file except in compliance with the License.
			
 
				+   You may obtain a copy of the License at
			
 
				+
			
 
				+       http://www.apache.org/licenses/LICENSE-2.0
			
 
				+
			
 
				+   Unless required by applicable law or agreed to in writing, software
			
 
				+   distributed under the License is distributed on an "AS IS" BASIS,
			
 
				+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
			
 
				+   See the License for the specific language governing permissions and
			
 
				+   limitations under the License.
			
--- a/Makefile
+++ b/Makefile
@@ -3,7 +3,7 @@ help:
 
				 	echo Options:
			
 
				 	echo make venv: create virtual environment
			
 
				 
			
 
				-venv: code/requirements.txt
			
 
				+venv: UNFCCC_GHG_data
			
 
				 	[ -d ./venv ] || python3 -m venv venv
			
 
				 	./venv/bin/pip install --upgrade pip
			
 
				 	./venv/bin/pip install -Ur code/requirements.txt
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/CRF_raw_for_year.py
@@ -17,16 +17,16 @@ from datetime import date
 
				 root_path = Path(__file__).parents[2].absolute()
			
 
				 root_path = root_path.resolve()
			
 
				 #log_path = root_path / "log"
			
 
				-code_path = root_path / "code"
			
 
				+code_path = root_path / "UNFCCC_GHG_data"
			
 
				 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
			
 
				 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
			
 
				 dataset_path = root_path / "datasets" / "UNFCCC"
			
 
				 
			
 
				 #sys.path.append(code_path.name)
			
 
				 
			
 
				-from util import all_crf_countries
			
 
				-from UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
			
 
				-from UNFCCC_CRF_reader_prod import submission_has_been_read
			
 
				+from .util import all_crf_countries
			
 
				+from .UNFCCC_CRF_reader_prod import get_input_and_output_files_for_country
			
 
				+from .UNFCCC_CRF_reader_prod import submission_has_been_read
			
 
				 
			
 
				 parser = argparse.ArgumentParser()
			
 
				 parser.add_argument('--submission_year', help='Submission round to read', type=int)
			
@@ -46,7 +46,7 @@ for country in all_crf_countries:
 
				         # check if the latest submission has been read already
			
 
				 
			
 
				         data_read = submission_has_been_read(
			
 
				-            country_info["code"], country_info["name"],
			
 
				+            country_info["UNFCCC_GHG_data"], country_info["name"],
			
 
				             submission_year=submission_year,
			
 
				             submission_date=country_info["date"],
			
 
				             verbose=False,
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_core.py
@@ -17,8 +17,8 @@ from operator import itemgetter
 
				 from collections import Counter
			
 
				 from typing import Dict, List, Optional, Tuple, Union
			
 
				 from datetime import datetime, timedelta
			
 
				-import crf_specifications as crf
			
 
				-from util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
			
 
				+from . import crf_specifications as crf
			
 
				+from .util import downloaded_data_path, NoCRFFilesError, custom_country_mapping
			
 
				 
			
 
				 
			
 
				 ### reading functions
			
@@ -144,7 +144,8 @@ def convert_crf_table_to_pm2if(
 
				         #coords_value_filling=coords_value_filling,
			
 
				         filter_remove=filter_remove,
			
 
				         filter_keep=filter_keep,
			
 
				-        meta_data=meta_data
			
 
				+        meta_data=meta_data,
			
 
				+        time_format="%Y",
			
 
				     )
			
 
				     return df_table_if
			
 
				 
			
@@ -170,7 +171,7 @@ def read_crf_table(
 
				     __________
			
 
				 
			
 
				     country_codes: str or list[str]
			
 
				-        ISO 3-letter country code or list of country codes
			
 
				+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
			
 
				 
			
 
				     table: str
			
 
				         name of the table sheet in the CRF xlsx file
			
@@ -265,8 +266,8 @@ def read_crf_table(
 
				                 df_all = pd.concat([df_this_file, df_all])
			
 
				                 unknown_rows = unknown_rows + unknown_rows_this_file
			
 
				                 last_row_info = last_row_info + last_row_info_this_file
			
 
				-        except:
			
 
				-            print(f"Year could not be converted to int for file {file}. Skipping file.")
			
 
				+        except Exception as e:
			
 
				+            print(f"Error when reading file {file}. Skipping file. Exception: {e}")
			
 
				 
			
 
				     return df_all, unknown_rows, last_row_info
			
 
				 
			
@@ -359,8 +360,15 @@ def read_crf_table_from_file(
 
				     df_header = df_header.replace(r"Unnamed: [0-9]{1,2}", np.nan, regex=True)
			
 
				     header = []
			
 
				     # fill nans with the last value from the left
			
 
				-    for row in range(0, len(df_header)):
			
 
				-        header.append(list(df_header.iloc[row].fillna(method="ffill")))
			
 
				+    if "header_fill" in table_properties:
			
 
				+        for row in range(0, len(df_header)):
			
 
				+            if table_properties["header_fill"][row]:
			
 
				+                header.append(list(df_header.iloc[row].fillna(method="ffill")))
			
 
				+            else:
			
 
				+                header.append(list(df_header.iloc[row]))
			
 
				+    else:
			
 
				+        for row in range(0, len(df_header)):
			
 
				+            header.append(list(df_header.iloc[row].fillna(method="ffill")))
			
 
				 
			
 
				     # combine all non-unit rows into one
			
 
				     entities = None
			
@@ -391,7 +399,6 @@ def read_crf_table_from_file(
 
				 
			
 
				     df_current.iloc[0] = units
			
 
				     df_current.columns = entities
			
 
				-    #### standardized header is finalized
			
 
				 
			
 
				     # remove all columns to ignore
			
 
				     df_current = df_current.drop(columns=table_properties["cols_to_ignore"])
			
@@ -533,7 +540,7 @@ def get_crf_files(
 
				     __________
			
 
				 
			
 
				     country_codes: str or list[str]
			
 
				-        ISO 3-letter country code or list of country codes
			
 
				+        ISO 3-letter country UNFCCC_GHG_data or list of country codes
			
 
				 
			
 
				     submission_year: int
			
 
				         Year of the submission of the data
			
@@ -644,7 +651,7 @@ def get_info_from_crf_filename(
 
				     Returns
			
 
				     _______
			
 
				     dict with fields:
			
 
				-        party: the party that submitted the data (3 letter code)
			
 
				+        party: the party that submitted the data (3 letter UNFCCC_GHG_data)
			
 
				         submission_year: year of submission
			
 
				         data_year: year in which the meissions took place
			
 
				         date: date of the submission
			
@@ -680,8 +687,8 @@ def filter_filenames(
 
				         List with pathlib.Path objects for the filenames to filter
			
 
				 
			
 
				     party: Optional[Union[str, List[str]]] (default: None)
			
 
				-        List of country codes or single country code. If given only files
			
 
				-        for this(these) country-code(s) will be returned.
			
 
				+        List of country codes or single country UNFCCC_GHG_data. If given only files
			
 
				+        for this(these) country-UNFCCC_GHG_data(s) will be returned.
			
 
				 
			
 
				     data_year: Optional[Union[int, List[int]]] (default: None)
			
 
				         List of data years or single year. If given only files for this
			
@@ -878,7 +885,7 @@ def filter_category(
 
				         mapping: List
			
 
				             mapping for a single category
			
 
				         country: str
			
 
				-            iso 3-letter code of the country
			
 
				+            iso 3-letter UNFCCC_GHG_data of the country
			
 
				 
			
 
				     Returns
			
 
				     _______
			
@@ -918,7 +925,7 @@ def get_latest_date_for_country(
 
				     Parameters
			
 
				     __________
			
 
				     country: str
			
 
				-        3-letter country code
			
 
				+        3-letter country UNFCCC_GHG_data
			
 
				 
			
 
				     submission_year: int
			
 
				         Year of the submission to find the l;atest date for
			
@@ -1054,7 +1061,7 @@ def find_latest_date(
 
				 def get_country_name(
			
 
				         country_code: str,
			
 
				 ) -> str:
			
 
				-    """get country name from code """
			
 
				+    """get country name from UNFCCC_GHG_data """
			
 
				     if country_code in custom_country_mapping:
			
 
				         country_name = custom_country_mapping[country_code]
			
 
				     else:
			
@@ -1062,7 +1069,7 @@ def get_country_name(
 
				             country = pycountry.countries.get(alpha_3=country_code)
			
 
				             country_name = country.name
			
 
				         except:
			
 
				-            raise ValueError(f"Country code {country_code} can not be mapped to "
			
 
				+            raise ValueError(f"Country UNFCCC_GHG_data {country_code} can not be mapped to "
			
 
				                              f"any country")
			
 
				 
			
 
				     return country_name
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_devel.py
@@ -14,21 +14,24 @@ from pathlib import Path
 
				 from datetime import date
			
 
				 
			
 
				 
			
 
				-from util import all_crf_countries
			
 
				-from util import log_path
			
 
				-import crf_specifications as crf
			
 
				-from UNFCCC_CRF_reader_core import get_country_name
			
 
				-from UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
			
 
				-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
			
 
				+from .util import all_crf_countries
			
 
				+from .util import log_path
			
 
				+from . import crf_specifications as crf
			
 
				+from .UNFCCC_CRF_reader_core import get_country_name
			
 
				+from .UNFCCC_CRF_reader_core import get_latest_date_for_country, read_crf_table
			
 
				+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
			
 
				 
			
 
				 def read_year_to_test_specs(
			
 
				         submission_year: int,
			
 
				         data_year: Optional[int]=None,
			
 
				+        totest: Optional[bool]=False,
			
 
				 ) -> xr.Dataset:
			
 
				     """
			
 
				     Read one xlsx file (so one data year) for each country for a submission year to
			
 
				     create log files and extend the specifications
			
 
				 
			
 
				+    totest: if true only read tables with "totest" status
			
 
				+
			
 
				     """
			
 
				     if data_year is None:
			
 
				         data_year=2000
			
@@ -37,6 +40,8 @@ def read_year_to_test_specs(
 
				     last_row_info = []
			
 
				     ds_all = None
			
 
				     print(f"CRF test reading for CRF{submission_year}. Using data year {data_year}")
			
 
				+    if totest:
			
 
				+        print("Reading only tables to test.")
			
 
				     print("#"*80)
			
 
				     try:
			
 
				         crf_spec = getattr(crf, f"CRF{submission_year}")
			
@@ -44,8 +49,12 @@ def read_year_to_test_specs(
 
				         raise ValueError(f"No terminology exists for submission years {submission_year}, "
			
 
				                          f"{submission_year - 1}")
			
 
				 
			
 
				-    tables = [table for table in crf_spec.keys()
			
 
				-              if crf_spec[table]["status"] == "tested"]
			
 
				+    if totest:
			
 
				+        tables = [table for table in crf_spec.keys()
			
 
				+                  if crf_spec[table]["status"] == "totest"]
			
 
				+    else:
			
 
				+        tables = [table for table in crf_spec.keys()
			
 
				+                  if crf_spec[table]["status"] == "tested"]
			
 
				     print(f"The following tables are available in the " \
			
 
				           f"CRF{submission_year} specification: {tables}")
			
 
				     print("#" * 80)
			
@@ -101,9 +110,9 @@ def read_year_to_test_specs(
 
				                         ds_all = ds_table_pm2
			
 
				                     else:
			
 
				                         ds_all = ds_all.combine_first(ds_table_pm2)
			
 
				-                except:
			
 
				-                    print(f"Error occured when converting table {table} for {country_name} to"
			
 
				-                          f" PRIMAP2 IF.")
			
 
				+                except Exception as e:
			
 
				+                    print(f"Error occured when converting table {table} for"
			
 
				+                          f" {country_name} to PRIMAP2 IF. Exception: {e}")
			
 
				                     # TODO: error handling and logging
			
 
				 
			
 
				     # process log messages.
			
@@ -116,8 +125,8 @@ def read_year_to_test_specs(
 
				 
			
 
				     if len(last_row_info) > 0:
			
 
				         log_location = log_path / f"CRF{submission_year}" \
			
 
				-                       / f"{data_yar}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
			
 
				-        print(f"Data found in the last row. Savin log to "
			
 
				+                       / f"{data_year}_last_row_info_{today.strftime('%Y-%m-%d')}.csv"
			
 
				+        print(f"Data found in the last row. Saving log to "
			
 
				               f"{log_location}")
			
 
				         save_last_row_info(last_row_info, log_location)
			
 
				 
			
@@ -125,6 +134,8 @@ def read_year_to_test_specs(
 
				     compression = dict(zlib=True, complevel=9)
			
 
				     output_folder = log_path / f"test_read_CRF{submission_year}"
			
 
				     output_filename = f"CRF{submission_year}_{today.strftime('%Y-%m-%d')}"
			
 
				+    if totest:
			
 
				+        output_filename = output_filename + "_totest"
			
 
				 
			
 
				     if not output_folder.exists():
			
 
				         output_folder.mkdir()
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/UNFCCC_CRF_reader_prod.py
@@ -13,24 +13,24 @@ from datetime import date
 
				 #from pathlib import Path
			
 
				 from typing import Optional, List, Dict, Union
			
 
				 
			
 
				-#from . import crf_specifications as crf
			
 
				-import crf_specifications as crf
			
 
				-
			
 
				-from UNFCCC_CRF_reader_core import read_crf_table
			
 
				-from UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
			
 
				-from UNFCCC_CRF_reader_core import get_latest_date_for_country
			
 
				-from UNFCCC_CRF_reader_core import get_crf_files
			
 
				-from UNFCCC_CRF_reader_core import get_country_name
			
 
				-from UNFCCC_CRF_reader_devel import save_unknown_categories_info
			
 
				-from UNFCCC_CRF_reader_devel import save_last_row_info
			
 
				-
			
 
				-from util import code_path, log_path, \
			
 
				+from . import crf_specifications as crf
			
 
				+#import crf_specifications as crf
			
 
				+
			
 
				+from .UNFCCC_CRF_reader_core import read_crf_table
			
 
				+from .UNFCCC_CRF_reader_core import convert_crf_table_to_pm2if
			
 
				+from .UNFCCC_CRF_reader_core import get_latest_date_for_country
			
 
				+from .UNFCCC_CRF_reader_core import get_crf_files
			
 
				+from .UNFCCC_CRF_reader_core import get_country_name
			
 
				+from .UNFCCC_CRF_reader_devel import save_unknown_categories_info
			
 
				+from .UNFCCC_CRF_reader_devel import save_last_row_info
			
 
				+
			
 
				+from .util import code_path, log_path, \
			
 
				     custom_country_mapping, extracted_data_path, root_path, \
			
 
				     all_crf_countries, NoCRFFilesError
			
 
				 
			
 
				-import sys
			
 
				-sys.path.append(code_path.name)
			
 
				-from UNFCCC_reader.get_submissions_info import get_country_code
			
 
				+#import sys
			
 
				+#sys.path.append(code_path.name)
			
 
				+from ..UNFCCC_reader import get_country_code
			
 
				 
			
 
				 
			
 
				 # functions:
			
@@ -45,7 +45,7 @@ from UNFCCC_reader.get_submissions_info import get_country_code
 
				 
			
 
				 
			
 
				 # general approach:
			
 
				-# main code in a function that reads on table from one file.
			
 
				+# main UNFCCC_GHG_data in a function that reads on table from one file.
			
 
				 # return raw pandas DF for use in different functions
			
 
				 # wrappers around this function to read for a whole country or for test reading where we also
			
 
				 # write files with missing sectors etc.
			
@@ -84,7 +84,7 @@ def read_crf_for_country(
 
				     __________
			
 
				 
			
 
				     country_codes: str
			
 
				-        ISO 3-letter country code
			
 
				+        ISO 3-letter country UNFCCC_GHG_data
			
 
				 
			
 
				     submission_year: int
			
 
				         Year of the submission of the data
			
@@ -220,7 +220,7 @@ def read_crf_for_country_datalad(
 
				     __________
			
 
				 
			
 
				     country_codes: str
			
 
				-        ISO 3-letter country code
			
 
				+        ISO 3-letter country UNFCCC_GHG_data
			
 
				 
			
 
				     submission_year: int
			
 
				         Year of the submission of the data
			
@@ -382,7 +382,7 @@ def read_new_crf_for_year_datalad(
 
				                 output_files = output_files + country_info["output"]
			
 
				             else:
			
 
				                 data_read = submission_has_been_read(
			
 
				-                    country_info["code"], country_info["name"],
			
 
				+                    country_info["UNFCCC_GHG_data"], country_info["name"],
			
 
				                     submission_year=submission_year,
			
 
				                     submission_date=country_info["date"],
			
 
				                     verbose=False,
			
@@ -438,7 +438,7 @@ def get_input_and_output_files_for_country(
 
				         country_code = get_country_code(country)
			
 
				     # now get the country name
			
 
				     country_name = get_country_name(country_code)
			
 
				-    country_info["code"] = country_code
			
 
				+    country_info["UNFCCC_GHG_data"] = country_code
			
 
				     country_info["name"] = country_name
			
 
				 
			
 
				     # determine latest data
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/__init__.py
@@ -3,6 +3,8 @@ CRF reader module
 
				 """
			
 
				 
			
 
				 #from pathlib import Path
			
 
				-#from . import crf_specifications
			
 
				+from . import crf_specifications
			
 
				 from .UNFCCC_CRF_reader_prod import read_crf_for_country, read_crf_for_country_datalad
			
 
				 
			
 
				+__all__ = ["crf_specifications", "read_crf_for_country", "read_crf_for_country_datalad"]
			
 
				+
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2021_specification.py
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2022_specification.py
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/CRF2023_specification.py
@@ -42,7 +42,7 @@ TODO:
 
				 import numpy as np
			
 
				 from .util import unit_info
			
 
				 
			
 
				-CRF2022 = {
			
 
				+CRF2023 = {
			
 
				     "Table1s1": {
			
 
				         "status": "tested",
			
 
				         "table": {
			
@@ -715,6 +715,7 @@ CRF2022 = {
 
				             ['Fossil part of biodiesel', ['1.A.3.b.i', 'OLBiodieselFC'], 4],  # LTU
			
 
				             ['Other', ['1.A.3.b.i', 'OLOther'], 4],  # UKR, MLT
			
 
				             ['Other Liquid Fuels', ['1.A.3.b.i', 'OLOther'], 4],  # CYP
			
 
				+            ['Other non-specified', ['1.A.3.b.i', 'OLOther'], 4],  # SWE new in 2023
			
 
				             ['Other motor fuels', ['1.A.3.b.i', 'OMotorFuels'], 4],  # RUS
			
 
				             ['Lubricants in 2-stroke engines', ['1.A.3.b.i', 'Lubricants'], 4],  # HUN
			
 
				             ['LNG', ['1.A.3.b.i', 'LNG'], 4],  ## USA
			
@@ -746,6 +747,7 @@ CRF2022 = {
 
				             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.ii', 'OLBiodieselFC'], 4],  # CAN
			
 
				             ['Other', ['1.A.3.b.ii', 'OLOther'], 4],  # UKR (and probably others)
			
 
				             ['Other Liquid Fuels', ['1.A.3.b.ii', 'OLOther'], 4],  # CYP
			
 
				+            ['Other non-specified', ['1.A.3.b.ii', 'OLOther'], 4],  # SWE new in 2023
			
 
				             ['Other motor fuels', ['1.A.3.b.ii', 'OMotorFuels'], 4],  # RUS
			
 
				             ['LNG', ['1.A.3.b.ii', 'LNG'], 4],  ## USA
			
 
				             ['Gaseous fuels', ['1.A.3.b.ii', 'Gaseous'], 3],
			
@@ -774,6 +776,7 @@ CRF2022 = {
 
				             ['Biodiesel (5 percent fossil portion)', ['1.A.3.b.iii', 'OLBiodieselFC'], 4],  # CAN
			
 
				             ['Other', ['1.A.3.b.iii', 'OLOther'], 4],  # UKR (and probably others)
			
 
				             ['Other Liquid Fuels', ['1.A.3.b.iii', 'OLOther'], 4],  # CYP
			
 
				+            ['Other non-specified', ['1.A.3.b.iii', 'OLOther'], 4],  # SWE new in 2023
			
 
				             ['Other motor fuels', ['1.A.3.b.iii', 'OMotorFuels'], 4],  # RUS
			
 
				             ['LNG', ['1.A.3.b.iii', 'LNG'], 4],  # USA
			
 
				             ['GTL', ['1.A.3.b.iii', 'GTL'], 4],  # MCO, new in 2022
			
@@ -802,6 +805,7 @@ CRF2022 = {
 
				             ['Lubricant Oil', ['1.A.3.b.iv', 'Lubricants'], 4],  # PRT
			
 
				             ['Other', ['1.A.3.b.iv', 'OLOther'], 4],  # UKR (and probably others)
			
 
				             ['Other Liquid Fuels', ['1.A.3.b.iv', 'OLOther'], 4],  # CYP
			
 
				+            ['Other non-specified', ['1.A.3.b.iv', 'OLOther'], 4],  # SWE new in 2023
			
 
				             ['Lube', ['1.A.3.b.iv', 'Lubricants'], 4],  # MCO
			
 
				             ['Lubricants in 2-stroke engines', ['1.A.3.b.iv', 'Lubricants'], 4],  # HUN
			
 
				             ['Lubricants (two-stroke engines)', ['1.A.3.b.iv', 'Lubricants'], 4],  # ESP
			
@@ -853,7 +857,7 @@ CRF2022 = {
 
				             ['Biomass', ['1.A.3.b.v.6', 'Biomass'], 4],
			
 
				             ['Other Fossil Fuels (please specify)', ['1.A.3.b.v.6', 'OtherFF'], 4],
			
 
				             # BEL
			
 
				-            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Total'], 3],
			
 
				+            ['Lubricant Two-Stroke Engines', ['1.A.3.b.v.7', 'Lubricants'], 3],
			
 
				             ['Other Liquid Fuels (please specify)', ['1.A.3.b.v.7', 'OtherLiquid'], 4],
			
 
				             # ROU
			
 
				             ['Gaseous Fuels', ['1.A.3.b.v.8', 'Total'], 3],
			
@@ -925,6 +929,9 @@ CRF2022 = {
 
				             ['Fuel oil C', ['1.A.3.d', 'FuelOilC'], 3],  # JPN
			
 
				             ['Diesel Oil', ['1.A.3.d', 'OLDiesel'], 3],  # FIN
			
 
				             ['Other Liquid Fuels', ['1.A.3.d', 'OLOther'], 3],  # ROU, new in 2022
			
 
				+            ['Heating and Other Gasoil', ['1.A.3.d', 'OLHeatingOtherGasoil'], 3],
			
 
				+            # ROU, new in 2023
			
 
				+            ['Liquified Petroleum Gas', ['1.A.3.d', 'OLLPG'], 3],  # ROU, new in 2023
			
 
				             ['Gaseous fuels', ['1.A.3.d', 'Gaseous'], 2],
			
 
				             ['Biomass(6)', ['1.A.3.d', 'Biomass'], 2],
			
 
				             ['Other fossil fuels (please specify)(4)', ['1.A.3.d', 'OtherFF'], 2],
			
@@ -1137,6 +1144,7 @@ CRF2022 = {
 
				             ['heavy fuel oil', ['1.A.4.c.ii', 'HeavyFuelOil'], 4],  # NOR
			
 
				             ['Other motor fuels', ['1.A.4.c.ii', 'OMotorFuels'], 4],  # RUS
			
 
				             ['Biodiesel (5 percent fossil portion)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
			
 
				+            ['Lubricating Oil (Two-Stroke Engines)', ['1.A.4.c.ii', 'OLBiodieselFC'], 4],  # CAN
			
 
				             ['Gaseous fuels', ['1.A.4.c.ii', 'Gaseous'], 3],
			
 
				             ['Biomass(6)', ['1.A.4.c.ii', 'Biomass'], 3],
			
 
				             ['Other fossil fuels (please specify)(4)', ['1.A.4.c.ii', 'OtherFF'], 3],
			
@@ -1448,6 +1456,8 @@ CRF2022 = {
 
				             ['Flaring', ['1.B.1.c.i'], 1],  # UKR, AUS
			
 
				             ['Flaring of gas', ['1.B.1.c.i'], 1],  # SWE
			
 
				             ['Coal Dumps', ['1.B.1.c.ii'], 1],  # JPN
			
 
				+            ['Uncontrolled combustion and burning coal dumps', ['1.B.1.c.ii'], 1],
			
 
				+            # JPN since 2023
			
 
				             ['SO2 scrubbing', ['1.B.1.c.iii'], 1],  # SVN
			
 
				             ['Flaring of coke oven gas', ['1.B.1.c.iv'], 1],  # KAZ
			
 
				             ['Emisson from Coke Oven Gas Subsystem', ['1.B.1.c.iv'], 1],  # POL
			
@@ -2280,6 +2290,8 @@ CRF2022 = {
 
				             ['Mechanical-Biological Treatment MBT', ['5.E.2']],  # DEU
			
 
				             ['Accidental fires', ['5.E.3']],  # DEU, DKE, DNK, DNM
			
 
				             ['Decomposition of Petroleum-Derived Surfactants', ['5.E.4']],  # JPN
			
 
				+            ['Decomposition of Fossil-fuel Derived Surfactants', ['5.E.4']],
			
 
				+            # JPN since 2023
			
 
				             ['Other non-specified', ['5.E.5']],  # USA
			
 
				             ['Biogas burning without energy recovery', ['5.E.6']],  # PRT
			
 
				             ['Sludge spreading', ['5.E.7']],  # ESP
			
@@ -2462,13 +2474,16 @@ CRF2022 = {
 
				             ['Other (please specify)', ['5.C.2.a.ii'], 2],
			
 
				             ['agricultural waste', ['5.C.2.a.ii.1'], 3],  # ITA
			
 
				             ['Agricultural residues', ['5.C.2.a.ii.1'], 3],  # ESP
			
 
				+            ['Agriculture residues', ['5.C.2.a.ii.1'], 3],  # PRT
			
 
				             ['Natural residues', ['5.C.2.a.ii.2'], 3],  # CHE
			
 
				             ['Wood waste', ['5.C.2.a.ii.3'], 3],  # GBR, GBK
			
 
				             ['Bonfires etc.', ['5.C.2.a.ii.4'], 3],  # DEU
			
 
				             ['Bonfires', ['5.C.2.a.ii.4'], 3],  # NLD, ISL
			
 
				             ['Other', ['5.C.2.a.ii.5'], 3],  # EST
			
 
				             ['Other waste', ['5.C.2.a.ii.5'], 3],  # CZE
			
 
				+            ['Waste', ['5.C.2.a.ii.5'], 3],  # GBR
			
 
				             ['Industrial Solid Waste', ['5.C.2.a.ii.6'], 3],  # JPN
			
 
				+            ['Vine', ['5.C.2.a.ii.7'], 3], # AUT
			
 
				             ['Non-biogenic', ['5.C.2.b'], 1],
			
 
				             ['Municipal solid waste', ['5.C.2.b.i'], 2],
			
 
				             ['Other (please specify)', ['5.C.2.b.ii'], 2],
			
@@ -2478,6 +2493,7 @@ CRF2022 = {
 
				             ['Bonfires', ['5.C.2.b.ii.4'], 3],  # ISL
			
 
				             ['Other', ['5.C.2.b.ii.5'], 3],  # EST
			
 
				             ['Other waste', ['5.C.2.b.ii.5'], 3],  # CZE
			
 
				+            ['Waste', ['5.C.2.b.ii.5'], 3],  # GBR
			
 
				             ['Industrial Solid Waste', ['5.C.2.b.ii.6'], 3],  # JPN
			
 
				         ],
			
 
				         "entity_mapping": {
			
@@ -2528,41 +2544,137 @@ CRF2022 = {
 
				         },
			
 
				     },  # tested
			
 
				     "Summary1.As1": {  # Summary 1, sheet 1
			
 
				-        "status": "TODO",
			
 
				+        "status": "tested",
			
 
				          "table": {
			
 
				             "firstrow": 5,
			
 
				-            "lastrow": 26,
			
 
				+            "lastrow": 28,
			
 
				             "header": ['entity', 'unit'],
			
 
				             "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
			
 
				             "categories": ["category"],
			
 
				             "cols_to_ignore": [],
			
 
				             "stop_cats": ["", np.nan],
			
 
				-            "unit_info": unit_info["default"],
			
 
				+            "unit_info": unit_info["summary"],
			
 
				         },
			
 
				         "sector_mapping": [
			
 
				-            ['Total Energy', ['1']],
			
 
				-            ['A. Fuel combustion activities (sectoral approach)', ['1.A']],
			
 
				+            ['Total national emissions and removals', ['0']],
			
 
				+            ['1. Energy', ['1']],
			
 
				+            ['A. Fuel combustion Reference approach(2)', ['1.A-ref']],
			
 
				+            ['Sectoral approach(2)', ['1.A']],
			
 
				             ['1. Energy industries', ['1.A.1']],
			
 
				-            ['a. Public electricity and heat production', ['1.A.1.a']],
			
 
				-            ['b. Petroleum refining', ['1.A.1.b']],
			
 
				-            ['c. Manufacture of solid fuels and other energy industries', ['1.A.1.c']],
			
 
				             ['2. Manufacturing industries and construction', ['1.A.2']],
			
 
				-            ['a. Iron and steel', ['1.A.2.a']],
			
 
				-            ['b. Non-ferrous metals', ['1.A.2.b']],
			
 
				-            ['c. Chemicals', ['1.A.2.c']],
			
 
				-            ['d. Pulp, paper and print', ['1.A.2.d']],
			
 
				-            ['e. Food processing, beverages and tobacco', ['1.A.2.e']],
			
 
				-            ['f. Non-metallic minerals', ['1.A.2.f']],
			
 
				-            ['g. Other (please specify)', ['1.A.2.g']],
			
 
				             ['3. Transport', ['1.A.3']],
			
 
				-            ['a. Domestic aviation', ['1.A.3.a']],
			
 
				-            ['b. Road transportation', ['1.A.3.b']],
			
 
				-            ['c. Railways', ['1.A.3.c']],
			
 
				-            ['d. Domestic navigation', ['1.A.3.d']],
			
 
				-            ['e. Other transportation', ['1.A.3.e']],
			
 
				+            ['4. Other sectors', ['1.A.4']],
			
 
				+            ['5. Other', ['1.A.5']],
			
 
				+            ['B. Fugitive emissions from fuels', ['1.B']],
			
 
				+            ['1. Solid fuels', ['1.B.1']],
			
 
				+            ['2. Oil and natural gas and other emissions from energy production',
			
 
				+             ['1.B.2']],
			
 
				+            ['C. CO2 Transport and storage', ['1.C']],
			
 
				+            ['2. Industrial processes and product use', ['2']],
			
 
				+            ['A. Mineral industry', ['2.A']],
			
 
				+            ['B. Chemical industry', ['2.B']],
			
 
				+            ['C. Metal industry', ['2.C']],
			
 
				+            ['D. Non-energy products from fuels and solvent use', ['2.D']],
			
 
				+            ['E. Electronic industry', ['2.E']],
			
 
				+            ['F. Product uses as substitutes for ODS', ['2.F']],
			
 
				+            ['G. Other product manufacture and use', ['2.G']],
			
 
				+            ['H. Other(3)', ['2.H']],
			
 
				         ],
			
 
				         "entity_mapping": {
			
 
				-            "NOX": "NOx",
			
 
				+            'NOX': 'NOx',
			
 
				+            'Net CO2 emissions/removals': 'CO2',
			
 
				+            'HFCs(1)': 'HFCS (AR4GWP100)',
			
 
				+            'PFCs(1)': 'PFCS (AR4GWP100)',
			
 
				+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
			
 
				+        },
			
 
				+        "coords_defaults": {
			
 
				+            "class": "Total",
			
 
				+        },
			
 
				+    },  # tested
			
 
				+    "Summary1.As2": {  # Summary 1, sheet 2
			
 
				+        "status": "tested",
			
 
				+         "table": {
			
 
				+            "firstrow": 5,
			
 
				+            "lastrow": 34,
			
 
				+            "header": ['entity', 'entity', 'unit'],
			
 
				+            "header_fill": [True, False, True],
			
 
				+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
			
 
				+            "categories": ["category"],
			
 
				+            "cols_to_ignore": [],
			
 
				+            "stop_cats": ["", np.nan],
			
 
				+            "unit_info": unit_info["summary"],
			
 
				+        },
			
 
				+        "sector_mapping": [
			
 
				+            ['3. Agriculture', ['3']],
			
 
				+            ['A. Enteric fermentation', ['3.A']],
			
 
				+            ['B. Manure management', ['3.B']],
			
 
				+            ['C. Rice cultivation', ['3.C']],
			
 
				+            ['D. Agricultural soils', ['3.D']],
			
 
				+            ['E. Prescribed burning of savannas', ['3.E']],
			
 
				+            ['F. Field burning of agricultural residues', ['3.F']],
			
 
				+            ['G. Liming', ['3.G']],
			
 
				+            ['H. Urea application', ['3.H']],
			
 
				+            ['I. Other carbon-contining fertilizers', ['3.I']],
			
 
				+            ['J. Other', ['3.J']],
			
 
				+            ['4. Land use, land-use change and forestry (4)', ['4']],
			
 
				+            ['A. Forest land (4)', ['4.A']],
			
 
				+            ['B. Cropland (4)', ['4.B']],
			
 
				+            ['C. Grassland (4)', ['4.C']],
			
 
				+            ['D. Wetlands (4)', ['4.D']],
			
 
				+            ['E. Settlements (4)', ['4.E']],
			
 
				+            ['F. Other land (4)', ['4.F']],
			
 
				+            ['G. Harvested wood products', ['4.G']],
			
 
				+            ['H. Other (4)', ['4.H']],
			
 
				+            ['5. Waste', ['5']],
			
 
				+            ['A. Solid waste disposal (5)', ['5.A']],
			
 
				+            ['B. Biological treatment of solid waste (5)', ['5.B']],
			
 
				+            ['C. Incineration and open burning of waste (5)', ['5.C']],
			
 
				+            ['D. Wastewater treatment and discharge', ['5.D']],
			
 
				+            ['E. Other (5)', ['5.E']],
			
 
				+            ['6. Other (please specify)(6)', ['6']],
			
 
				+        ],
			
 
				+        "entity_mapping": {
			
 
				+            'NOX': 'NOx',
			
 
				+            'Net CO2 emissions/removals': 'CO2',
			
 
				+            'HFCs (1)': 'HFCS (AR4GWP100)',
			
 
				+            'PFCs(1)': 'PFCS (AR4GWP100)',
			
 
				+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
			
 
				+        },
			
 
				+        "coords_defaults": {
			
 
				+            "class": "Total",
			
 
				+        },
			
 
				+    },  # tested
			
 
				+    "Summary1.As3": {  # Summary 1, sheet 3
			
 
				+        "status": "tested",
			
 
				+         "table": {
			
 
				+            "firstrow": 5,
			
 
				+            "lastrow": 17,
			
 
				+            "header": ['entity', 'entity', 'unit'],
			
 
				+            "header_fill": [True, False, True],
			
 
				+            "col_for_categories": "GREENHOUSE GAS SOURCE AND SINK CATEGORIES",
			
 
				+            "categories": ["category"],
			
 
				+            "cols_to_ignore": [],
			
 
				+            "stop_cats": ["", np.nan],
			
 
				+            "unit_info": unit_info["summary"],
			
 
				+        },
			
 
				+        "sector_mapping": [
			
 
				+            ['Memo items:(7)', ['\IGNORE']],
			
 
				+            ['International bunkers', ['M.Memo.Int']],
			
 
				+            ['Aviation', ['M.Memo.Int.Avi']],
			
 
				+            ['Navigation', ['M.Memo.Int.Mar']],
			
 
				+            ['Multilateral operations', ['M.Memo.Mult']],
			
 
				+            ['CO2 emissions from biomass', ['M.Memo.Bio']],
			
 
				+            ['CO2 captured', ['M.Memo.CO2Cap']],
			
 
				+            ['Long-term storage of C in waste disposal sites', ['M.Memo.LTSW']],
			
 
				+            ['Indirect N2O', ['M.Memo.IndN2O']],
			
 
				+            ['Indirect CO2', ['M.Memo.IndCO2']],
			
 
				+        ],
			
 
				+        "entity_mapping": {
			
 
				+            'NOX': 'NOx',
			
 
				+            'Net CO2 emissions/removals': 'CO2',
			
 
				+            'HFCs(1)': 'HFCS (AR4GWP100)',
			
 
				+            'PFCs(1)': 'PFCS (AR4GWP100)',
			
 
				+            'Unspecified mix of HFCs and PFCs(1)': 'UnspMixOfHFCsPFCs (AR4GWP100)',
			
 
				         },
			
 
				         "coords_defaults": {
			
 
				             "class": "Total",
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/__init__.py
@@ -5,3 +5,5 @@ Define the CRF specifications here for easy access
 
				 from .CRF2021_specification import CRF2021
			
 
				 from .CRF2022_specification import CRF2022
			
 
				 from .CRF2023_specification import CRF2023
			
 
				+
			
 
				+__all__ = ["CRF2021", "CRF2022", "CRF2023"]
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/crf_specifications/util.py
@@ -29,4 +29,14 @@ unit_info = {
 
				         },
			
 
				         "default_unit": "kt",
			
 
				     },
			
 
				+    "summary": {  # contains fgas mixtures in CO2 eq units
			
 
				+        "unit_row": 0,
			
 
				+        "entity_row": "header",
			
 
				+        "regexp_entity": r".*",
			
 
				+        "regexp_unit": r"\((.*)\)",
			
 
				+        "manual_repl_unit": {
			
 
				+            "(kt CO2 equivalent)": "kt CO2eq",
			
 
				+        },
			
 
				+        "default_unit": "kt",
			
 
				+    },
			
 
				 }
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission.py
@@ -3,11 +3,11 @@ This script is a wrapper around the read_crf_for_country
 
				 function such that it can be called from datalad
			
 
				 """
			
 
				 
			
 
				-from UNFCCC_CRF_reader_prod import read_crf_for_country
			
 
				+from .UNFCCC_CRF_reader_prod import read_crf_for_country
			
 
				 import argparse
			
 
				 
			
 
				 parser = argparse.ArgumentParser()
			
 
				-parser.add_argument('--country', help='Country name or code')
			
 
				+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
			
 
				 parser.add_argument('--submission_year', help='Submission round to read', type=int)
			
 
				 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
			
 
				 parser.add_argument('--re_read', help='Read data also if already read before',
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py
@@ -4,11 +4,11 @@ from doit in the current setup where doit runs on system python and
 
				 not in the venv.
			
 
				 """
			
 
				 
			
 
				-from UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
			
 
				+from .UNFCCC_CRF_reader_prod import read_crf_for_country_datalad
			
 
				 import argparse
			
 
				 
			
 
				 parser = argparse.ArgumentParser()
			
 
				-parser.add_argument('--country', help='Country name or code')
			
 
				+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
			
 
				 parser.add_argument('--submission_year', help='Submission round to read')
			
 
				 parser.add_argument('--submission_date', help='Date of submission to read', default=None)
			
 
				 parser.add_argument('--re_read', help='Read data also if already read before',
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year.py
@@ -3,7 +3,7 @@ This script is a wrapper around the read_crf_for_country
 
				 function such that it can be called from datalad
			
 
				 """
			
 
				 
			
 
				-from UNFCCC_CRF_reader_prod import read_new_crf_for_year
			
 
				+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year
			
 
				 import argparse
			
 
				 
			
 
				 parser = argparse.ArgumentParser()
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py
@@ -4,7 +4,7 @@ from doit in the current setup where doit runs on system python and
 
				 not in the venv.
			
 
				 """
			
 
				 
			
 
				-from UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
			
 
				+from .UNFCCC_CRF_reader_prod import read_new_crf_for_year_datalad
			
 
				 from util import NoCRFFilesError
			
 
				 import argparse
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/test_read_UNFCCC_CRF_for_year.py
@@ -3,21 +3,27 @@ This script is a wrapper around the read_year_to_test_specs
 
				 function such that it can be called from datalad
			
 
				 """
			
 
				 
			
 
				-from UNFCCC_CRF_reader_devel import read_year_to_test_specs
			
 
				+from UNFCCC_GHG_data.UNFCCC_CRF_reader.UNFCCC_CRF_reader_devel import read_year_to_test_specs
			
 
				 import argparse
			
 
				 
			
 
				 parser = argparse.ArgumentParser()
			
 
				 parser.add_argument('--submission_year', help='Submission round to read', type=int)
			
 
				 parser.add_argument('--data_year', help='Data year to read', type=int, default=2010)
			
 
				+parser.add_argument('--totest', help='read tables to test', action='store_true')
			
 
				 args = parser.parse_args()
			
 
				 
			
 
				 
			
 
				 submission_year = args.submission_year
			
 
				 data_year = args.data_year
			
 
				+if args.totest:
			
 
				+    totest = True
			
 
				+else:
			
 
				+    totest = False
			
 
				 
			
 
				 read_year_to_test_specs(
			
 
				     submission_year=submission_year,
			
 
				     data_year=data_year,
			
 
				+    totest=totest,
			
 
				 )
			
 
				 
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
+++ b/UNFCCC_GHG_data/UNFCCC_CRF_reader/util.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
				 root_path = Path(__file__).parents[2].absolute()
			
 
				 root_path = root_path.resolve()
			
 
				 log_path = root_path / "log"
			
 
				-code_path = root_path / "code"
			
 
				+code_path = root_path / "UNFCCC_GHG_data"
			
 
				 downloaded_data_path = root_path / "downloaded_data" / "UNFCCC"
			
 
				 extracted_data_path = root_path / "extracted_data" / "UNFCCC"
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/__init__.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py
--- a/UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_downloader/unfccc_submission_info.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Argentina/read_ARG_BUR4_from_pdf.py
@@ -86,7 +86,7 @@ cat_codes_manual = {  # conversion to PRIMAP1 format
 
				     'S/N': 'MMULTIOP',
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[A-Z0-9]{1,8}).*'
			
 
				+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[A-Z0-9]{1,8}).*'
			
 
				 
			
 
				 time_format = "%Y"
			
 
				 
			
@@ -219,7 +219,7 @@ for page in pages_to_read:
 
				     if page in range(232, 235):
			
 
				         df_current.iloc[
			
 
				             metadata["entity"][0], metadata["entity"][1]] = "KYOTOGHG (SARGWP100)"
			
 
				-    # drop all rows where the index cols (category code and name) are both NaN
			
 
				+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				     # as without one of them there is no category information
			
 
				     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				     # set index. necessary for the stack operation in the conversion to long format
			
@@ -253,7 +253,7 @@ for page in pages_to_read:
 
				 
			
 
				     df_current["category"] = df_current["category"].replace(cat_codes_manual)
			
 
				     # then the regex replacements
			
 
				-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
			
 
				     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
			
 
				                                                                 regex=True)
			
 
				 
			
@@ -311,7 +311,7 @@ for page in pages_to_read_fgases:
 
				         dict(zip(df_current.columns, list(df_current.loc[idx_header[0]]))), axis=1)
			
 
				     df_current = df_current.drop(idx_header)
			
 
				 
			
 
				-    # drop all rows where the index cols (category code and name) are both NaN
			
 
				+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				     # as without one of them there is no category information
			
 
				     df_current.dropna(axis=0, how='all', subset=index_cols_fgases, inplace=True)
			
 
				     # set index. necessary for the stack operation in the conversion to long format
			
@@ -350,7 +350,7 @@ for page in pages_to_read_fgases:
 
				 
			
 
				     df_current["category"] = df_current["category"].replace(cat_codes_manual)
			
 
				     # then the regex repalcements
			
 
				-    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('code'))
			
 
				+    repl = lambda m: convert_ipcc_code_primap_to_primap2('IPC' + m.group('UNFCCC_GHG_data'))
			
 
				     df_current["category"] = df_current["category"].str.replace(cat_code_regexp, repl,
			
 
				                                                                 regex=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Chile/config_CHL_BUR4.py
@@ -64,7 +64,7 @@ filter_remove_IPCC2006 = {
 
				 }
			
 
				 
			
 
				 
			
 
				-cat_mapping = { # categories not listed here have the same code as in IPCC 2006 specifications
			
 
				+cat_mapping = { # categories not listed here have the same UNFCCC_GHG_data as in IPCC 2006 specifications
			
 
				     '3': 'M.AG',
			
 
				     '3.A': '3.A.1',
			
 
				     '3.A.1': '3.A.1.a',
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Chile/read_CHL_BUR4_from_xlsx.py
@@ -52,7 +52,7 @@ unit_info = {
 
				 }
			
 
				 cols_to_drop = ['Unnamed: 14', 'Unnamed: 16', 'Código IPCC.1',
			
 
				                 'Categorías de fuente y sumidero de gases de efecto invernadero.1']
			
 
				-# columns for category code and original category name
			
 
				+# columns for category UNFCCC_GHG_data and original category name
			
 
				 index_cols = ['Código IPCC', 'Categorías de fuente y sumidero de gases de efecto invernadero']
			
 
				 
			
 
				 # operations on long format DF
			
@@ -169,7 +169,7 @@ for year in years_to_read:
 
				     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=str(year), skiprows=2, nrows=442, engine="openpyxl")
			
 
				     # drop the columns which are empty and repetition of the metadata for the second block
			
 
				     df_current.drop(cols_to_drop, axis=1, inplace=True)
			
 
				-    # drop all rows where the index cols (category code and name) are both NaN
			
 
				+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				     # as without one of them there is no category information
			
 
				     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				     # set multi-index. necessary for the stack operation in the conversion to long format
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Colombia/read_COL_BUR3_from_xlsx.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Indonesia/read_IDN_BUR3_from_pdf.py
@@ -38,7 +38,7 @@ year = 2019
 
				 entity_row = 0
			
 
				 unit_row = 1
			
 
				 index_cols = "Categories"
			
 
				-# special header as category code and name in one column
			
 
				+# special header as category UNFCCC_GHG_data and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 
			
@@ -51,7 +51,7 @@ cat_codes_manual = {
 
				     #'3A2b Direct N2O Emissions from Manure Management': '3.A.2',
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9]{1,4})\s.*'
			
 
				+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9]{1,4})\s.*'
			
 
				 
			
 
				 coords_cols = {
			
 
				     "category": "category",
			
@@ -202,7 +202,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
				 # first the manual replacements
			
 
				 df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				 # then the regex replacements
			
 
				-repl = lambda m: m.group('code')
			
 
				+repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				 df_all = df_all.reset_index(drop=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mexico/config_MEX_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Mexico/read_MEX_BUR3_from_pdf.py
@@ -32,7 +32,7 @@ entity_row = 0
 
				 unit_row = 1
			
 
				 
			
 
				 index_cols = "Categorías de fuentes y sumideros de GEI"
			
 
				-# special header as category code and name in one column
			
 
				+# special header as category UNFCCC_GHG_data and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 units = {
			
@@ -53,7 +53,7 @@ cat_codes_manual = {
 
				     '2F6 Otras aplicaciones': '2F6',
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'^\[(?P<code>[a-zA-Z0-9]{1,3})\].*'
			
 
				+cat_code_regexp = r'^\[(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,3})\].*'
			
 
				 
			
 
				 coords_cols = {
			
 
				     "category": "category",
			
@@ -168,7 +168,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
				 # first the manual replacements
			
 
				 df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				 # then the regex replacements
			
 
				-repl = lambda m: m.group('code')
			
 
				+repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				 df_all = df_all.reset_index(drop=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/config_MNE_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Montenegro/read_MNE_BUR3_from_pdf.py
@@ -129,7 +129,7 @@ for i, table in enumerate(tables):
 
				         unit_parts = unit.split(" ")
			
 
				         unit = f"{unit_parts[0]} CO2eq"
			
 
				 
			
 
				-    # remove "/n" from category code and name columns
			
 
				+    # remove "/n" from category UNFCCC_GHG_data and name columns
			
 
				     df_current_table.iloc[:, 0] = df_current_table.iloc[:, 0].str.replace("\n", "")
			
 
				     df_current_table.iloc[:, 1] = df_current_table.iloc[:, 1].str.replace("\n", "")
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Morocco/config_MAR_BUR3.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Morocco/read_MAR_BUR3_from_pdf.py
@@ -32,7 +32,7 @@ pages_to_read = range(104, 138)
 
				 
			
 
				 compression = dict(zlib=True, complevel=9)
			
 
				 
			
 
				-# special header as category code and name in one column
			
 
				+# special header as category UNFCCC_GHG_data and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 index_cols = ['Catégories']
			
@@ -58,7 +58,7 @@ cat_codes_manual = {
 
				     '1.B.1.a.i.1 -Exploitation minière': '1.A.1.a.i.1',
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
			
 
				+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,14})\s-\s.*'
			
 
				 
			
 
				 coords_terminologies = {
			
 
				     "area": "ISO3",
			
@@ -171,7 +171,7 @@ df_all["category"] = df_all["orig_cat_name"]
 
				 # first the manual replacements
			
 
				 df_all["category"] = df_all["category"].replace(cat_codes_manual)
			
 
				 # then the regex replacements
			
 
				-repl = lambda m: m.group('code')
			
 
				+repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				 df_all["category"] = df_all["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				 df_all = df_all.reset_index(drop=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/config_KOR_BUR4.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_2021-Inventory_from_xlsx.py
@@ -37,7 +37,7 @@ years_to_read = range(1990, 2019 + 1)
 
				 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
			
 
				 cols_to_read = range(1, 2019 - 1990 + 3)
			
 
				 
			
 
				-# columns for category code and original category name
			
 
				+# columns for category UNFCCC_GHG_data and original category name
			
 
				 index_cols = ['분야·부문/연도']
			
 
				 
			
 
				 sheet_metadata = {
			
@@ -136,7 +136,7 @@ for sheet in sheets_to_read:
 
				     # read current sheet (one sheet per gas)
			
 
				     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=146, usecols=cols_to_read,
			
 
				                                engine="openpyxl")
			
 
				-    # drop all rows where the index cols (category code and name) are both NaN
			
 
				+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				     # as without one of them there is no category information
			
 
				     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				     # set index. necessary for the stack operation in the conversion to long format
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Republic_of_Korea/read_KOR_BUR4_from_xlsx.py
@@ -32,7 +32,7 @@ years_to_read = range(1990, 2018 + 1)
 
				 sheets_to_read = ['온실가스', 'CO2', 'CH4', 'N2O', 'HFCs', 'PFCs', 'SF6']
			
 
				 cols_to_read = range(1, 2018 - 1990 + 3)
			
 
				 
			
 
				-# columns for category code and original category name
			
 
				+# columns for category UNFCCC_GHG_data and original category name
			
 
				 index_cols = ['분야·부문/연도']
			
 
				 
			
 
				 sheet_metadata = {
			
@@ -131,7 +131,7 @@ for sheet in sheets_to_read:
 
				     # read current sheet (one sheet per gas)
			
 
				     df_current = pd.read_excel(input_folder / inventory_file, sheet_name=sheet, skiprows=3, nrows=144, usecols=cols_to_read,
			
 
				                                engine="openpyxl")
			
 
				-    # drop all rows where the index cols (category code and name) are both NaN
			
 
				+    # drop all rows where the index cols (category UNFCCC_GHG_data and name) are both NaN
			
 
				     # as without one of them there is no category information
			
 
				     df_current.dropna(axis=0, how='all', subset=index_cols, inplace=True)
			
 
				     # set index. necessary for the stack operation in the conversion to long format
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/config_TWN_NIR2022.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Taiwan/read_TWN_2022-Inventory_from_pdf.py
@@ -32,7 +32,7 @@ if not output_folder.exists():
 
				 output_filename = 'TWN_inventory_2022_'
			
 
				 inventory_file = '00_abstract_en.pdf'
			
 
				 
			
 
				-cat_code_regexp = r'(?P<code>^[a-zA-Z0-9\.]{1,7})\s.*'
			
 
				+cat_code_regexp = r'(?P<UNFCCC_GHG_data>^[a-zA-Z0-9\.]{1,7})\s.*'
			
 
				 
			
 
				 time_format = "%Y"
			
 
				 
			
@@ -227,7 +227,7 @@ for table_name in table_defs.keys():
 
				     df_this_table["category"] = df_this_table["category"].replace(
			
 
				         table_def["cat_codes_manual"])
			
 
				     # then the regex replacements
			
 
				-    repl = lambda m: m.group('code')
			
 
				+    repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				     df_this_table["category"] = df_this_table["category"].str.replace(cat_code_regexp,
			
 
				                                                                       repl, regex=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/Thailand/read_THA_BUR3_from_pdf.py
@@ -44,7 +44,7 @@ unit_row = 1
 
				 gwp_to_use = "AR4GWP100"
			
 
				 
			
 
				 index_cols = "Greenhouse gas source and sink categories"
			
 
				-# special header as category code and name in one column
			
 
				+# special header as category UNFCCC_GHG_data and name in one column
			
 
				 header_long = ["orig_cat_name", "entity", "unit", "time", "data"]
			
 
				 
			
 
				 # manual category codes
			
@@ -54,7 +54,7 @@ cat_codes_manual = {
 
				     'CO2 from Biomass': 'MBIO',
			
 
				 }
			
 
				 
			
 
				-cat_code_regexp = r'^(?P<code>[a-zA-Z0-9]{1,4})[\s\.].*'
			
 
				+cat_code_regexp = r'^(?P<UNFCCC_GHG_data>[a-zA-Z0-9]{1,4})[\s\.].*'
			
 
				 
			
 
				 coords_cols = {
			
 
				     "category": "category",
			
@@ -184,7 +184,7 @@ df_inventory_long["category"] = df_inventory_long["orig_cat_name"]
 
				 # first the manual replacements
			
 
				 df_inventory_long["category"] = df_inventory_long["category"].replace(cat_codes_manual)
			
 
				 # then the regex replacements
			
 
				-repl = lambda m: m.group('code')
			
 
				+repl = lambda m: m.group('UNFCCC_GHG_data')
			
 
				 df_inventory_long["category"] = df_inventory_long["category"].str.replace(cat_code_regexp, repl, regex=True)
			
 
				 df_inventory_long = df_inventory_long.reset_index(drop=True)
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/__init__.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/__init__.py
@@ -0,0 +1,6 @@
 
				+# expose some of the functions to the outside as they are used in other readers as well
			
 
				+# TODO: create a unified util module for all readers
			
 
				+
			
 
				+from .get_submissions_info import get_country_code
			
 
				+
			
 
				+__all__ = ["get_country_code"]
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/country_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/country_info.py
@@ -8,7 +8,7 @@ from get_submissions_info import get_country_datasets
 
				 # Find the right function and possible input and output files and
			
 
				 # read the data using datalad run.
			
 
				 parser = argparse.ArgumentParser()
			
 
				-parser.add_argument('--country', help='Country name or code')
			
 
				+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
			
 
				 args = parser.parse_args()
			
 
				 country = args.country
			
 
				 
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
+++ b/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.json
--- a/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py
--- a/UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/get_submissions_info.py
@@ -9,7 +9,7 @@ import pycountry
 
				 
			
 
				 root_path = Path(__file__).parents[2].absolute()
			
 
				 root_path = root_path.resolve()
			
 
				-code_path = root_path / "code" / "UNFCCC_reader"
			
 
				+code_path = root_path / "UNFCCC_GHG_data" / "UNFCCC_reader"
			
 
				 # beware, folders below are different than for CRF reader
			
 
				 downloaded_data_path = root_path / "downloaded_data"
			
 
				 extracted_data_path = root_path / "extracted_data"
			
@@ -46,14 +46,14 @@ def get_country_submissions(
 
				         print_sub: bool = True,
			
 
				 ) -> Dict[str, List[str]]:
			
 
				     """
			
 
				-    Input is a three letter ISO code for a country, or the countries name.
			
 
				-    The function tries to map the country name to an ISO code and then
			
 
				+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the countries name.
			
 
				+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
			
 
				     queries the folder mapping files for folders.
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				         country_name: str
			
 
				-            String containing the country name or ISO 3 letter code
			
 
				+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
			
 
				 
			
 
				         print_sub: bool
			
 
				             If True information on submissions will be written to stdout
			
@@ -70,7 +70,7 @@ def get_country_submissions(
 
				     country_code = get_country_code(country_name)
			
 
				 
			
 
				     if print_sub:
			
 
				-        print(f"Country name {country_name} maps to ISO code {country_code}")
			
 
				+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
			
 
				 
			
 
				     country_submissions = {}
			
 
				     if print_sub:
			
@@ -115,14 +115,14 @@ def get_country_datasets(
 
				         print_ds: bool = True,
			
 
				 ) -> Dict[str, List[str]]:
			
 
				     """
			
 
				-    Input is a three letter ISO code for a country, or the country's name.
			
 
				-    The function tries to map the country name to an ISO code and then
			
 
				-    checks the code and data folders for content on the country.
			
 
				+    Input is a three letter ISO UNFCCC_GHG_data for a country, or the country's name.
			
 
				+    The function tries to map the country name to an ISO UNFCCC_GHG_data and then
			
 
				+    checks the UNFCCC_GHG_data and data folders for content on the country.
			
 
				 
			
 
				     Parameters
			
 
				     ----------
			
 
				         country_name: str
			
 
				-            String containing the country name or ISO 3 letter code
			
 
				+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
			
 
				 
			
 
				         print_ds: bool
			
 
				             If True information on submissions will be written to stdout
			
@@ -138,11 +138,11 @@ def get_country_datasets(
 
				     data_folder_legacy = legacy_data_path
			
 
				 
			
 
				 
			
 
				-    # obtain country code
			
 
				+    # obtain country UNFCCC_GHG_data
			
 
				     country_code = get_country_code(country_name)
			
 
				 
			
 
				     if print_ds:
			
 
				-        print(f"Country name {country_name} maps to ISO code {country_code}")
			
 
				+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
			
 
				 
			
 
				     rep_data = {}
			
 
				     # data
			
@@ -181,7 +181,7 @@ def get_country_datasets(
 
				                     # process filename to get submission
			
 
				                     parts = dataset.split('_')
			
 
				                     if parts[0] != country_code:
			
 
				-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
			
 
				+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
			
 
				                     else:
			
 
				                         terminology = "_".join(parts[3 : ])
			
 
				                         key = f"{parts[1]} ({parts[2]}, {terminology})"
			
@@ -197,9 +197,9 @@ def get_country_datasets(
 
				 
			
 
				                         code_file = get_code_file(country_code, parts[1])
			
 
				                         if code_file:
			
 
				-                            data_info = data_info + f"code: {code_file.name}"
			
 
				+                            data_info = data_info + f"UNFCCC_GHG_data: {code_file.name}"
			
 
				                         else:
			
 
				-                            data_info = data_info + f"code: not found"
			
 
				+                            data_info = data_info + f"UNFCCC_GHG_data: not found"
			
 
				 
			
 
				                         cleaned_datasets_current_folder[key] = data_info
			
 
				 
			
@@ -250,7 +250,7 @@ def get_country_datasets(
 
				                     # process filename to get submission
			
 
				                     parts = dataset.split('_')
			
 
				                     if parts[0] != country_code:
			
 
				-                        cleaned_datasets_current_folder[f'Wrong code: {parts[0]}'] = dataset
			
 
				+                        cleaned_datasets_current_folder[f'Wrong UNFCCC_GHG_data: {parts[0]}'] = dataset
			
 
				                     else:
			
 
				                         terminology = "_".join(parts[3 : ])
			
 
				                         key = f"{parts[1]} ({parts[2]}, {terminology}, legacy)"
			
@@ -288,13 +288,13 @@ def get_country_code(
 
				         country_name: str,
			
 
				 )->str:
			
 
				     """
			
 
				-    obtain country code. If the input is a code it will be returned, if the input
			
 
				-    is not a three letter code a search will be performed
			
 
				+    obtain country UNFCCC_GHG_data. If the input is a UNFCCC_GHG_data it will be returned, if the input
			
 
				+    is not a three letter UNFCCC_GHG_data a search will be performed
			
 
				 
			
 
				     Parameters
			
 
				     __________
			
 
				     country_name: str
			
 
				-        Country code or name to get the three-letter code for.
			
 
				+        Country UNFCCC_GHG_data or name to get the three-letter UNFCCC_GHG_data for.
			
 
				 
			
 
				     """
			
 
				     # First check if it's in the list of custom codes
			
@@ -302,7 +302,7 @@ def get_country_code(
 
				         country_code = country_name
			
 
				     else:
			
 
				         try:
			
 
				-            # check if it's a 3 letter code
			
 
				+            # check if it's a 3 letter UNFCCC_GHG_data
			
 
				             country = pycountry.countries.get(alpha_3=country_name)
			
 
				             country_code = country.alpha_3
			
 
				         except:
			
@@ -310,7 +310,7 @@ def get_country_code(
 
				                 country = pycountry.countries.search_fuzzy(country_name.replace("_", " "))
			
 
				             except:
			
 
				                 raise ValueError(f"Country name {country_name} can not be mapped to "
			
 
				-                                 f"any country code. Try using the ISO3 code directly.")
			
 
				+                                 f"any country UNFCCC_GHG_data. Try using the ISO3 UNFCCC_GHG_data directly.")
			
 
				             if len(country) > 1:
			
 
				                 country_code = None
			
 
				                 for current_country in country:
			
@@ -337,13 +337,13 @@ def get_possible_inputs(
 
				     Parameters
			
 
				     ----------
			
 
				         country_name: str
			
 
				-            String containing the country name or ISO 3 letter code
			
 
				+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
			
 
				 
			
 
				         submission: str
			
 
				             String of the submission
			
 
				 
			
 
				         print_info: bool = False
			
 
				-            If True print information on code found
			
 
				+            If True print information on UNFCCC_GHG_data found
			
 
				 
			
 
				     Returns
			
 
				     -------
			
@@ -352,11 +352,11 @@ def get_possible_inputs(
 
				 
			
 
				     data_folder = downloaded_data_path
			
 
				 
			
 
				-    # obtain country code
			
 
				+    # obtain country UNFCCC_GHG_data
			
 
				     country_code = get_country_code(country_name)
			
 
				 
			
 
				     if print_info:
			
 
				-        print(f"Country name {country_name} maps to ISO code {country_code}")
			
 
				+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
			
 
				 
			
 
				     input_files = []
			
 
				     for item in data_folder.iterdir():
			
@@ -399,7 +399,7 @@ def get_possible_outputs(
 
				     Parameters
			
 
				     ----------
			
 
				         country_name: str
			
 
				-            String containing the country name or ISO 3 letter code
			
 
				+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
			
 
				 
			
 
				         submission: str
			
 
				             String of the submission
			
@@ -414,10 +414,10 @@ def get_possible_outputs(
 
				 
			
 
				     data_folder = extracted_data_path
			
 
				 
			
 
				-    # obtain country code
			
 
				+    # obtain country UNFCCC_GHG_data
			
 
				     country_code = get_country_code(country_name)
			
 
				     if print_info:
			
 
				-        print(f"Country name {country_name} maps to ISO code {country_code}")
			
 
				+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
			
 
				 
			
 
				     output_files = []
			
 
				     for item in data_folder.iterdir():
			
@@ -457,17 +457,17 @@ def get_code_file(
 
				     Parameters
			
 
				     ----------
			
 
				         country_name: str
			
 
				-            String containing the country name or ISO 3 letter code
			
 
				+            String containing the country name or ISO 3 letter UNFCCC_GHG_data
			
 
				 
			
 
				         submission: str
			
 
				             String of the submission
			
 
				 
			
 
				         print_info: bool = False
			
 
				-            If True print information on code found
			
 
				+            If True print information on UNFCCC_GHG_data found
			
 
				 
			
 
				     Returns
			
 
				     -------
			
 
				-        returns a pathlib Path object for the code file
			
 
				+        returns a pathlib Path object for the UNFCCC_GHG_data file
			
 
				     """
			
 
				 
			
 
				     code_file_path = None
			
@@ -477,18 +477,18 @@ def get_code_file(
 
				     if submission[0:3] == "CRF":
			
 
				         return root_path / "UNFCCC_CRF_reader"
			
 
				 
			
 
				-    # obtain country code
			
 
				+    # obtain country UNFCCC_GHG_data
			
 
				     country_code = get_country_code(country_name)
			
 
				 
			
 
				     if print_info:
			
 
				-        print(f"Country name {country_name} maps to ISO code {country_code}")
			
 
				+        print(f"Country name {country_name} maps to ISO UNFCCC_GHG_data {country_code}")
			
 
				 
			
 
				     with open(code_path / "folder_mapping.json", "r") as mapping_file:
			
 
				         folder_mapping = json.load(mapping_file)
			
 
				 
			
 
				     if country_code not in folder_mapping:
			
 
				         if print_info:
			
 
				-            print("No code available")
			
 
				+            print("No UNFCCC_GHG_data available")
			
 
				             print("")
			
 
				     else:
			
 
				         country_folder = code_path / folder_mapping[country_code]
			
@@ -497,13 +497,13 @@ def get_code_file(
 
				         for file in country_folder.iterdir():
			
 
				             if file.match(code_file_name_candidate):
			
 
				                 if code_file_path is not None:
			
 
				-                    raise ValueError(f"Found multiple code candidates: "
			
 
				+                    raise ValueError(f"Found multiple UNFCCC_GHG_data candidates: "
			
 
				                                      f"{code_file_path} and file.name. "
			
 
				                                      f"Please use only one file with name "
			
 
				                                      f"'read_ISO3_submission_XXX.YYY'.")
			
 
				                 else:
			
 
				                     if print_info:
			
 
				-                        print(f"Found code file {file.relative_to(root_path)}")
			
 
				+                        print(f"Found UNFCCC_GHG_data file {file.relative_to(root_path)}")
			
 
				                 code_file_path = file
			
 
				 
			
 
				     if code_file_path is not None:
			
--- a/UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
+++ b/UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py
@@ -14,7 +14,7 @@ from get_submissions_info import get_possible_outputs
 
				 # Find the right function and possible input and output files and
			
 
				 # read the data using datalad run.
			
 
				 parser = argparse.ArgumentParser()
			
 
				-parser.add_argument('--country', help='Country name or code')
			
 
				+parser.add_argument('--country', help='Country name or UNFCCC_GHG_data')
			
 
				 parser.add_argument('--submission', help='Submission to read')
			
 
				 
			
 
				 args = parser.parse_args()
			
@@ -34,7 +34,7 @@ print("")
 
				 script_name = get_code_file(country, submission)
			
 
				 
			
 
				 if script_name is not None:
			
 
				-    print(f"Found code file {script_name}")
			
 
				+    print(f"Found UNFCCC_GHG_data file {script_name}")
			
 
				     print("")
			
 
				 
			
 
				     # get possible input files
			
@@ -77,8 +77,8 @@ if script_name is not None:
 
				         explicit=True,
			
 
				     )
			
 
				 else:
			
 
				-    # no code found.
			
 
				-    print(f"No code found to read {submission} from {country}")
			
 
				+    # no UNFCCC_GHG_data found.
			
 
				+    print(f"No UNFCCC_GHG_data found to read {submission} from {country}")
			
 
				     print(f"Use 'doit country_info --country={country} to get "
			
 
				           f"a list of available submissions and datasets.")
			
 
				 
			
--- a/UNFCCC_GHG_data/__init__.py
+++ b/UNFCCC_GHG_data/__init__.py
@@ -0,0 +1,8 @@
 
				+####
			
 
				+
			
 
				+from . import UNFCCC_reader
			
 
				+from . import UNFCCC_CRF_reader
			
 
				+# import UNFCCC_DI_reader
			
 
				+# import UNFCCC_downloader
			
 
				+
			
 
				+__all__ = ["UNFCCC_reader", "UNFCCC_CRF_reader"]
			
--- a/code/requirements.txt
+++ b/code/requirements.txt
@@ -1,12 +0,0 @@
 
				-bs4
			
 
				-requests
			
 
				-pandas
			
 
				-selenium
			
 
				-primap2
			
 
				-countrynames
			
 
				-pycountry
			
 
				-datalad
			
 
				-treelib
			
 
				-camelot-py
			
 
				-opencv-python
			
 
				-ghostscript
			
--- a/dodo.py
+++ b/dodo.py
@@ -7,10 +7,12 @@ from doit import get_var
 
				 def task_setup_venv():
			
 
				     """Create virtual environment"""
			
 
				     return {
			
 
				-        'file_dep': ['code/requirements.txt'],
			
 
				+        'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
			
 
				         'actions': ['python3 -m venv venv',
			
 
				-                    './venv/bin/pip install --upgrade pip',
			
 
				-                    './venv/bin/pip install -Ur code/requirements.txt',
			
 
				+                    './venv/bin/pip install --upgrade pip wheel',
			
 
				+                    #'./venv/bin/pip install -Ur UNFCCC_GHG_data/requirements.txt',
			
 
				+                    './venv/bin/pip install --upgrade --upgrade-strategy '
			
 
				+                    'eager -e .[dev]',
			
 
				                     'touch venv',],
			
 
				         'targets': ['venv'],
			
 
				         'verbosity': 2,
			
@@ -27,7 +29,7 @@ def task_map_folders():
 
				     Create or update the folder mapping in the given folder
			
 
				     """
			
 
				     return {
			
 
				-        'actions': [f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder={read_config_folder['folder']}"],
			
 
				         'verbosity': 2,
			
 
				         'setup': ['setup_venv'],
			
@@ -41,7 +43,7 @@ def task_update_bur():
 
				         'targets': ['downloaded_data/UNFCCC/submissions-bur.csv'],
			
 
				         'actions': ['datalad run -m "Fetch BUR submissions" '
			
 
				                     '-o downloaded_data/UNFCCC/submissions-bur.csv '
			
 
				-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_bur.py'],
			
 
				+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_bur.py'],
			
 
				         'verbosity': 2,
			
 
				         'setup': ['setup_venv'],
			
 
				     }
			
@@ -55,8 +57,8 @@ def task_download_bur():
 
				         # before download
			
 
				         'actions': ['datalad run -m "Download BUR submissions" '
			
 
				                     '-i downloaded_data/UNFCCC/submissions-bur.csv '
			
 
				-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=BUR',
			
 
				-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=BUR',
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder=downloaded_data/UNFCCC"
			
 
				                     ],
			
 
				         'verbosity': 2,
			
@@ -70,7 +72,7 @@ def task_update_nc():
 
				         'targets': ['downloaded_data/UNFCCC/submissions-nc.csv'],
			
 
				         'actions': ['datalad run -m "Fetch NC submissions" '
			
 
				                     '-o downloaded_data/UNFCCC/submissions-nc.csv '
			
 
				-                    './venv/bin/python code/UNFCCC_downloader/fetch_submissions_nc.py'],
			
 
				+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_nc.py'],
			
 
				         'verbosity': 2,
			
 
				         'setup': ['setup_venv'],
			
 
				     }
			
@@ -84,8 +86,8 @@ def task_download_nc():
 
				         # before download
			
 
				         'actions': ['datalad run -m "Download NC submissions" '
			
 
				                     '-i downloaded_data/UNFCCC/submissions-nc.csv '
			
 
				-                    './venv/bin/python code/UNFCCC_downloader/download_non-annexI.py --category=NC',
			
 
				-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_non-annexI.py --category=NC',
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder=downloaded_data/UNFCCC"
			
 
				                     ],
			
 
				         'verbosity': 2,
			
@@ -108,7 +110,7 @@ def task_update_annexi():
 
				         'actions': [f"datalad run -m 'Fetch AnnexI submissions for {update_aI_config['year']}' "
			
 
				                     "--explicit "
			
 
				                     f"-o downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
			
 
				-                    f"./venv/bin/python code/UNFCCC_downloader/fetch_submissions_annexI.py "
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/fetch_submissions_annexI.py "
			
 
				                     f"--year={update_aI_config['year']}"],
			
 
				         'verbosity': 2,
			
 
				         'setup': ['setup_venv'],
			
@@ -124,9 +126,9 @@ def task_download_annexi():
 
				         'actions': [f"datalad run -m 'Download AnnexI submissions for "
			
 
				                     f"{update_aI_config['category']}{update_aI_config['year']}' "
			
 
				                     f"-i downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv "
			
 
				-                    f"./venv/bin/python code/UNFCCC_downloader/download_annexI.py "
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_annexI.py "
			
 
				                     f"--category={update_aI_config['category']} --year={update_aI_config['year']}",
			
 
				-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder=downloaded_data/UNFCCC"
			
 
				                     ],
			
 
				         'verbosity': 2,
			
@@ -138,8 +140,8 @@ def task_download_ndc():
 
				     """ Download NDC submissions """
			
 
				     return {
			
 
				         'actions': ['datalad run -m "Download NDC submissions" '
			
 
				-                    './venv/bin/python code/UNFCCC_downloader/download_ndc.py',
			
 
				-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+                    './venv/bin/python UNFCCC_GHG_data/UNFCCC_downloader/download_ndc.py',
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder=downloaded_data/UNFCCC"
			
 
				                     ],
			
 
				         'verbosity': 2,
			
@@ -157,11 +159,11 @@ read_config = {
 
				 
			
 
				 # TODO: make individual task for non-UNFCCC submissions
			
 
				 def task_read_unfccc_submission():
			
 
				-    """ Read submission for a country (if code exists) (not for CRF)"""
			
 
				+    """ Read submission for a country (if UNFCCC_GHG_data exists) (not for CRF)"""
			
 
				     return {
			
 
				-        'actions': [f"./venv/bin/python code/UNFCCC_reader/read_UNFCCC_submission.py "
			
 
				+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/read_UNFCCC_submission.py "
			
 
				                     f"--country={read_config['country']} --submission={read_config['submission']}",
			
 
				-                    f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+                    f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                     f"--folder=extracted_data/UNFCCC"
			
 
				                     ],
			
 
				         'verbosity': 2,
			
@@ -182,11 +184,11 @@ read_config_crf = {
 
				 def task_read_unfccc_crf_submission():
			
 
				     """ Read CRF submission for a country """
			
 
				     actions = [
			
 
				-        f"./venv/bin/python code/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
			
 
				+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_UNFCCC_CRF_submission_datalad.py "
			
 
				         f"--country={read_config_crf['country']} "
			
 
				         f"--submission_year={read_config_crf['submission_year']} "
			
 
				         f"--submission_date={read_config_crf['submission_date']} ",
			
 
				-        f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+        f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				         f"--folder=extracted_data/UNFCCC"
			
 
				         ]
			
 
				     if read_config_crf["re_read"] == "True":
			
@@ -201,9 +203,9 @@ def task_read_unfccc_crf_submission():
 
				 def task_read_new_unfccc_crf_for_year():
			
 
				     """ Read CRF submission for all countries for given submission year. by default only reads
			
 
				     data not present yet. Only reads the latest updated submission for each country."""
			
 
				-    actions = [f"./venv/bin/python code/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
			
 
				+    actions = [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_CRF_reader/read_new_UNFCCC_CRF_for_year_datalad.py "
			
 
				                f"--submission_year={read_config_crf['submission_year']} ",
			
 
				-               f"./venv/bin/python code/UNFCCC_reader/folder_mapping.py "
			
 
				+               f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/folder_mapping.py "
			
 
				                f"--folder=extracted_data/UNFCCC"
			
 
				                ]
			
 
				     # specifying countries is currently disabled duo to problems with command line
			
@@ -224,7 +226,7 @@ def task_country_info():
 
				     """ Print information on submissions and datasets
			
 
				     available for given country"""
			
 
				     return {
			
 
				-        'actions': [f"./venv/bin/python code/UNFCCC_reader/country_info.py "
			
 
				+        'actions': [f"./venv/bin/python UNFCCC_GHG_data/UNFCCC_reader/country_info.py "
			
 
				                     f"--country={read_config['country']}"],
			
 
				         'verbosity': 2,
			
 
				         'setup': ['setup_venv'],
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,8 @@
 
				+[build-system]
			
 
				+requires = [
			
 
				+    "setuptools>=42",
			
 
				+    "wheel",
			
 
				+    "setuptools_scm[toml]>=3.4"
			
 
				+]
			
 
				+build-backend = "setuptools.build_meta"
			
 
				+
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1 @@
 
				+.
			
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -0,0 +1 @@
 
				+.[dev]
			
--- a/setup.cfg
+++ b/setup.cfg
@@ -0,0 +1,72 @@
 
				+[metadata]
			
 
				+name = UNFCCC_GHG_data
			
 
				+version = 0.2
			
 
				+author = Johannes Gütschow
			
 
				+author_email = mail@johannes-guetschow.de
			
 
				+description = Tools to read GHG data submitted to the UNFCCC using various methods
			
 
				+long_description = file: README.md
			
 
				+long_description_content_type = text/md
			
 
				+url = https://github.com/JGuetschow/UNFCCC_non-AnnexI_data
			
 
				+#project_urls =
			
 
				+classifiers =
			
 
				+    Development Status :: 3 - Alpha
			
 
				+    Intended Audience :: Science/Research
			
 
				+    Topic :: Scientific/Engineering :: Atmospheric Science
			
 
				+    License :: OSI Approved :: Apache Software License
			
 
				+    Natural Language :: English
			
 
				+    Programming Language :: Python :: 3
			
 
				+    Programming Language :: Python :: 3.8
			
 
				+    Programming Language :: Python :: 3.9
			
 
				+    Programming Language :: Python :: 3.10
			
 
				+license = Apache Software License 2.0
			
 
				+license_file = LICENSE
			
 
				+
			
 
				+[options]
			
 
				+packages =
			
 
				+    UNFCCC_GHG_data
			
 
				+    UNFCCC_GHG_data.UNFCCC_CRF_reader
			
 
				+    UNFCCC_GHG_data.UNFCCC_reader
			
 
				+    UNFCCC_GHG_data.UNFCCC_downloader
			
 
				+    #UNFCCC_GHG_data.UNFCCC_DI_reader
			
 
				+    #UNFCCC_GHG_data.datasets
			
 
				+python_requires = >=3.8
			
 
				+setup_requires =
			
 
				+    setuptools_scm
			
 
				+install_requires =
			
 
				+    bs4
			
 
				+    requests
			
 
				+    pandas
			
 
				+    selenium
			
 
				+    primap2
			
 
				+    countrynames
			
 
				+    pycountry
			
 
				+    datalad
			
 
				+    treelib
			
 
				+    camelot-py
			
 
				+    opencv-python
			
 
				+    ghostscript
			
 
				+
			
 
				+[options.extras_require]
			
 
				+dev =
			
 
				+    pip
			
 
				+    wheel
			
 
				+    bs4
			
 
				+    requests
			
 
				+    pandas
			
 
				+    selenium
			
 
				+    primap2
			
 
				+    countrynames
			
 
				+    pycountry
			
 
				+    datalad
			
 
				+    treelib
			
 
				+    camelot-py
			
 
				+    opencv-python
			
 
				+    ghostscript
			
 
				+    ipykernel
			
 
				+    jupyter
			
 
				+
			
 
				+
			
 
				+[options.package_data]
			
 
				+* =
			
 
				+    *.csv
			
 
				+    *.nc
			
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,5 @@
 
				+#!/usr/bin/env python
			
 
				+
			
 
				+import setuptools
			
 
				+
			
 
				+setuptools.setup()