|
@@ -16,6 +16,8 @@ from config_mng_bur2 import ( # noqa: E402
|
|
|
coords_defaults,
|
|
|
coords_terminologies,
|
|
|
coords_value_mapping,
|
|
|
+ inv_conf_harvested_wood_products,
|
|
|
+ inv_conf_per_sector,
|
|
|
meta_data,
|
|
|
)
|
|
|
|
|
@@ -25,7 +27,7 @@ from unfccc_ghg_data.helper import ( # noqa: E402
|
|
|
fix_rows,
|
|
|
)
|
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
+if __name__ == "__main__" :
|
|
|
# ###
|
|
|
# configuration
|
|
|
# ###
|
|
@@ -33,7 +35,7 @@ if __name__ == "__main__":
|
|
|
input_folder = downloaded_data_path / "UNFCCC" / "Mongolia" / "BUR2"
|
|
|
output_folder = extracted_data_path / "UNFCCC" / "Mongolia"
|
|
|
|
|
|
- if not output_folder.exists():
|
|
|
+ if not output_folder.exists() :
|
|
|
output_folder.mkdir()
|
|
|
|
|
|
pdf_file = "20231112_NIR_MGL.pdf"
|
|
@@ -41,9 +43,11 @@ if __name__ == "__main__":
|
|
|
category_column = f"category ({coords_terminologies['category']})"
|
|
|
compression = dict(zlib=True, complevel=9)
|
|
|
|
|
|
- def repl(m): # noqa: D103
|
|
|
+
|
|
|
+ def repl(m) : # noqa: D103
|
|
|
return m.group("code")
|
|
|
|
|
|
+
|
|
|
# ###
|
|
|
# 1. Read in main tables
|
|
|
# ###
|
|
@@ -284,56 +288,9 @@ if __name__ == "__main__":
|
|
|
# The table for harvested wood products is in a different format
|
|
|
# and needs to be read in separately.
|
|
|
|
|
|
- inv_conf_harvested_wood_products = {
|
|
|
- 'page' : '151',
|
|
|
- "category_column" : 'Categories',
|
|
|
- "cat_codes_manual" : {
|
|
|
- 'GHG emission' : '3.D.1',
|
|
|
- },
|
|
|
- 'unit' : 'Gg',
|
|
|
- 'entity' : 'CO2',
|
|
|
- 'parts' : {
|
|
|
- "part_1" : {
|
|
|
- "page_defs" :
|
|
|
- {
|
|
|
- "area" : ["52,690,555,647"],
|
|
|
- "cols" : ["101,149,196,231,268,310,351,398,433,476,514"],
|
|
|
- },
|
|
|
- "rows_to_fix" : {
|
|
|
- 3 : [
|
|
|
- "GHG",
|
|
|
- ],
|
|
|
- },
|
|
|
- },
|
|
|
- "part_2" : {
|
|
|
- "page_defs" :
|
|
|
- {
|
|
|
- "area" : ["52,637,555,596"],
|
|
|
- "cols" : ["99,150,197,239,281,326,372,425,469,516"],
|
|
|
- },
|
|
|
- "rows_to_fix" : {
|
|
|
- 3 : [
|
|
|
- "GHG",
|
|
|
- ],
|
|
|
- },
|
|
|
- },
|
|
|
- "part_3" : {
|
|
|
- "page_defs" :
|
|
|
- {
|
|
|
- "area" : ["52,591,550,547"],
|
|
|
- "cols" : ["106,156,197,239,281,326,372,420,465,509"],
|
|
|
- },
|
|
|
- "rows_to_fix" : {
|
|
|
- 3 : [
|
|
|
- "GHG",
|
|
|
- ],
|
|
|
- },
|
|
|
- }},
|
|
|
- }
|
|
|
-
|
|
|
print("-" * 60)
|
|
|
print(
|
|
|
- f"Reading sector harvested wood products table."
|
|
|
+ "Reading sector harvested wood products table."
|
|
|
)
|
|
|
|
|
|
df_hwp = None
|
|
@@ -349,8 +306,9 @@ if __name__ == "__main__":
|
|
|
|
|
|
df_hwp_part = tables_inventory_original[0].df
|
|
|
|
|
|
- if "rows_to_fix" in inv_conf_harvested_wood_products['parts'][part]:
|
|
|
- for n_rows in inv_conf_harvested_wood_products['parts'][part]["rows_to_fix"].keys():
|
|
|
+ if "rows_to_fix" in inv_conf_harvested_wood_products['parts'][part] :
|
|
|
+ for n_rows in inv_conf_harvested_wood_products[
|
|
|
+ 'parts'][part]["rows_to_fix"].keys():
|
|
|
df_hwp_part = fix_rows(
|
|
|
df_hwp_part,
|
|
|
rows_to_fix=inv_conf_harvested_wood_products['parts'][part]["rows_to_fix"][n_rows],
|
|
@@ -379,7 +337,6 @@ if __name__ == "__main__":
|
|
|
inv_conf_harvested_wood_products["cat_codes_manual"]
|
|
|
)
|
|
|
|
|
|
-
|
|
|
# unit is always the same
|
|
|
df_hwp.loc[:, "unit"] = inv_conf_harvested_wood_products["unit"]
|
|
|
|
|
@@ -390,364 +347,9 @@ if __name__ == "__main__":
|
|
|
# 3. Read in aggregated tables from 1990 - 2020
|
|
|
# ###
|
|
|
|
|
|
- inv_conf_per_sector = {
|
|
|
- "total": {
|
|
|
- "page_defs": {
|
|
|
- "32": {
|
|
|
- "area": ["64,649,547,106"],
|
|
|
- "cols": ["106,182,237,294,345,403,480"],
|
|
|
- },
|
|
|
- },
|
|
|
- "entity": "KYOTOGHG (SARGWP100)",
|
|
|
- # "category_column": "Categories",
|
|
|
- # "columns_to_drop": ["Categories"],
|
|
|
- # "years": ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
|
|
|
- "unit": "Gg CO2e",
|
|
|
- "last_year": "2020",
|
|
|
- "rows_to_fix": {
|
|
|
- -3: [
|
|
|
- "Year",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column": " Year ",
|
|
|
- # TODO some categories are not recognized!
|
|
|
- "cat_codes_manual": {
|
|
|
- " Energy ": "1",
|
|
|
- " IPPU ": "2",
|
|
|
- " Agriculture ": "3",
|
|
|
- " Waste ": "4",
|
|
|
- " LULUCF ": "M.LULUCF",
|
|
|
- "Total (excl. LULUCF)": "M.0.EL",
|
|
|
- "Total (incl. LULUCF)": "M.0",
|
|
|
- },
|
|
|
- },
|
|
|
- "energy": {
|
|
|
- "page_defs": {
|
|
|
- "43": {
|
|
|
- "area": ["59,478,544,79"],
|
|
|
- "cols": ["97,160,220,262,338,388,452,502"],
|
|
|
- },
|
|
|
- "44": {
|
|
|
- "area": ["60,773,546,582"],
|
|
|
- "cols": ["103,165,226,274,329,384,444,494"],
|
|
|
- },
|
|
|
- },
|
|
|
- "entity": "KYOTOGHG (SARGWP100)",
|
|
|
- # "category_column" : "Categories",
|
|
|
- # "columns_to_drop" : ["Categories"],
|
|
|
- # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
|
|
|
- "unit": "Gg CO2e",
|
|
|
- "last_year": "2020",
|
|
|
- "rows_to_fix": {
|
|
|
- 11: [
|
|
|
- "Years",
|
|
|
- ],
|
|
|
- },
|
|
|
- "rows_to_drop": [0, 2],
|
|
|
- "year_column": "Years ",
|
|
|
- "cat_codes_manual": {
|
|
|
- r" 1.A.1.a.i Electricity generation ": "1.A.1.a.i",
|
|
|
- r" 1.A.1.a.ii Combined heat and ipower peneration (CHP)": "1.A.1.a.ii", # noqa: E501
|
|
|
- r" 1.A.1.c.ii Other energy ndustries ": "1.A.1.c.ii",
|
|
|
- r"Manufacturing industries and construction ": "1.A.2",
|
|
|
- r" 1.A.3.a 1 Civil aviation t ": "1.A.3.a",
|
|
|
- r" .A.3.b Road ransportation ": "1.A.3.b",
|
|
|
- r" 1.A.3.c Railways ": "1.A.3.c",
|
|
|
- r" 1.A.3.e.ii Off-road ": "1.A.3.e.ii",
|
|
|
- },
|
|
|
- },
|
|
|
- "energy cont": {
|
|
|
- "page_defs": {
|
|
|
- "44": {
|
|
|
- "area": ["59,552,553,84"],
|
|
|
- "cols": ["103,173,219,274,330,382,443,494"],
|
|
|
- },
|
|
|
- },
|
|
|
- "entity": "KYOTOGHG (SARGWP100)",
|
|
|
- # "category_column" : "Categories",
|
|
|
- # "columns_to_drop" : ["Categories"],
|
|
|
- # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
|
|
|
- "unit": "Gg CO2e",
|
|
|
- "last_year": "2020",
|
|
|
- "rows_to_fix": {
|
|
|
- 8: [
|
|
|
- "Years",
|
|
|
- ],
|
|
|
- },
|
|
|
- "rows_to_drop": [0, 2],
|
|
|
- "year_column": "Years ",
|
|
|
- "cat_codes_manual": {
|
|
|
- "Other sectors 1.A.4.a Commercial/ Institutional ": "1.A.4.a",
|
|
|
- " 1.A.4.b Residen-tial ": "1.A.4.b",
|
|
|
- " 1.A.4.c.i Agriculture -Stationary ": "1.A.4.c.i",
|
|
|
- " 1.A.4.c.ii Agriculture -Off-road vehicles and other machinery": "1.A.4.c.ii", # noqa: E501
|
|
|
- "Non-specified 1.A.5.a Stationary ": "1.A.5.a",
|
|
|
- "Fugitive emis 1.B.1.a Coal mining & handling (surface mining) ": "1.B.1.a", # noqa: E501
|
|
|
- "sions from fu 1.B.2.a.ii Oil -Flaring ": "1.B.2.a.ii",
|
|
|
- "els 1.B.2.a.iii.2 Oil production and upgrading ": "1.B.2.a.iii",
|
|
|
- },
|
|
|
- },
|
|
|
- "ippu": {
|
|
|
- "page_defs": {
|
|
|
- "74": {
|
|
|
- "area": ["68,701,544,313"],
|
|
|
- "cols": ["97,188,261,358,462"],
|
|
|
- },
|
|
|
- },
|
|
|
- "entity": "KYOTOGHG (SARGWP100)",
|
|
|
- # "category_column" : "Categories",
|
|
|
- # "columns_to_drop" : ["Categories"],
|
|
|
- # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
|
|
|
- "unit": "Gg CO2e",
|
|
|
- "last_year": "2020",
|
|
|
- "rows_to_fix": {
|
|
|
- 3: [
|
|
|
- "Year",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column": "Year ",
|
|
|
- "cat_codes_manual": {
|
|
|
- "2.A-Mineral industry ": "2.A",
|
|
|
- "2.C-Metal industry ": "2.C",
|
|
|
- "2.D-Non-energy products from fuels and solvent use": "2.D",
|
|
|
- "2.F-Product uses as substitutes for ozone depleting substances": "2.F",
|
|
|
- "2. IPPU Total ": "2",
|
|
|
- },
|
|
|
- "remove_duplicates": ["2"],
|
|
|
- },
|
|
|
- "livestock": {
|
|
|
- "page_defs": {
|
|
|
- "103": {
|
|
|
- "area": ["62,480,544,82"],
|
|
|
- "cols": ["97,182,259,326,403,474"],
|
|
|
- },
|
|
|
- },
|
|
|
- # "entity": "KYOTOGHG (SARGWP100)",
|
|
|
- # "category_column" : "Categories",
|
|
|
- # "columns_to_drop" : ["Categories"],
|
|
|
- # "years" : ["1990", "1995", "2000", "2005", "2010", "2015", "2020"],
|
|
|
- "unit": "Gg CO2e",
|
|
|
- "last_year": "2020",
|
|
|
- "rows_to_fix": {
|
|
|
- 3: [
|
|
|
- "Year",
|
|
|
- ],
|
|
|
- },
|
|
|
- "rows_to_drop": [0, 1],
|
|
|
- "year_column": "Year ",
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual": {
|
|
|
- "Fermentation Gg": "3.A.1",
|
|
|
- "Management CH4": "3.A.2",
|
|
|
- " (Total CH4) ": "3.A",
|
|
|
- "Fermentation Gg C": "3.A.1",
|
|
|
- "Management O2e": "3.A.2",
|
|
|
- " (Gg CO2e) ": "3.A",
|
|
|
- },
|
|
|
- "multi_entity": {
|
|
|
- "unit": ["Gg", "Gg", "Gg", "Gg CO2e", "Gg CO2e", "Gg CO2e"],
|
|
|
- "entity": [
|
|
|
- "CH4",
|
|
|
- "CH4",
|
|
|
- "CH4",
|
|
|
- "KYOTOGHG (SARGWP100)",
|
|
|
- "KYOTOGHG (SARGWP100)",
|
|
|
- "KYOTOGHG (SARGWP100)",
|
|
|
- ],
|
|
|
- },
|
|
|
- },
|
|
|
- "biomass_burning": {
|
|
|
- "page_defs": {
|
|
|
- "114": {
|
|
|
- "area": ["70,214,544,78"],
|
|
|
- "cols": ["116,185,239,304,365,426,491"],
|
|
|
- },
|
|
|
- "115": {
|
|
|
- "area": ["72,777,545,505"],
|
|
|
- "cols": ["123,190,250,313,374,438,495"],
|
|
|
- },
|
|
|
- },
|
|
|
- "last_year": "2020",
|
|
|
- "col_to_use": 5,
|
|
|
- "rows_to_fix": {
|
|
|
- 7: [
|
|
|
- "3.C.1 - Emiss",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column": " Year ",
|
|
|
- # TODO: These categories are technically duplicate, just with a different unit
|
|
|
- "categories_to_drop": [
|
|
|
- "3.C.1 -Emiss CH4 (Gg CO2e)",
|
|
|
- "ions from bioma (CO2e) N2O (Gg CO2e)",
|
|
|
- "ss burning Total (Gg CO2e)",
|
|
|
- ],
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual": {
|
|
|
- " 3.C.1 CH4 (Gg) ": "3.C.1",
|
|
|
- " -Emissions fr N2O (Gg) ": "3.C.1",
|
|
|
- " om biomass bur NOx (Gg) ": "3.C.1",
|
|
|
- " ning CO(Gg) ": "3.C.1",
|
|
|
- },
|
|
|
- "multi_entity": {
|
|
|
- "unit": ["Gg", "Gg", "Gg", "Gg"],
|
|
|
- "entity": [
|
|
|
- "CH4",
|
|
|
- "N2O",
|
|
|
- "NOx",
|
|
|
- "CO",
|
|
|
- ],
|
|
|
- },
|
|
|
- },
|
|
|
- "managed_soils_direct": {
|
|
|
- "page_defs": {
|
|
|
- "119": {
|
|
|
- "area": ["70,600,541,173"],
|
|
|
- "cols": ["114,191,245,328,400,476"],
|
|
|
- },
|
|
|
- },
|
|
|
- "last_year": "2020",
|
|
|
- "col_to_use": 3,
|
|
|
- "rows_to_fix": {
|
|
|
- 10: [
|
|
|
- "Urine and dung",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column": " Year ",
|
|
|
- # # TODO: technically duplicate, just with a different unit
|
|
|
- "categories_to_drop": [
|
|
|
- " 3.C.4 -Direct N2O Emissions from managed soils (CO2e) Gg CO2e",
|
|
|
- ],
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual": {
|
|
|
- # TODO the next 4 categories are made up placeholders
|
|
|
- " Inorganic N fertilizer application N2O (Gg)": "3.C.4.i",
|
|
|
- " Organic N applied as fertilizer (manure) N2O (Gg)": "3.C.4.ii",
|
|
|
- "Urine and dung N deposited on pasture, range and paddock by grazing animals N2O (Gg)": "3.C.4.iii",
|
|
|
- " N in crop residues N2O (Gg)": "3.C.4.iiii",
|
|
|
- " 3.C.4 -Direct N2O Emissions from managed soils N2O (Gg)": "3.C.4",
|
|
|
- },
|
|
|
- "multi_entity": {
|
|
|
- "unit": ["Gg", "Gg", "Gg", "Gg", "Gg"],
|
|
|
- "entity": [
|
|
|
- "N2O",
|
|
|
- "N2O",
|
|
|
- "N2O",
|
|
|
- "N2O",
|
|
|
- "N2O",
|
|
|
- ],
|
|
|
- },
|
|
|
- },
|
|
|
- "managed_soils_indirect": {
|
|
|
- "page_defs": {
|
|
|
- "125": {
|
|
|
- "area": ["74,214,539,83"],
|
|
|
- "cols": ["125,222,309,423"],
|
|
|
- },
|
|
|
- "126": {
|
|
|
- "area": ["72,775,539,369"],
|
|
|
- "cols": ["148,248,351,459"],
|
|
|
- },
|
|
|
- },
|
|
|
- "last_year": "2020",
|
|
|
- "col_to_use": 3,
|
|
|
- "rows_to_fix": {
|
|
|
- 7: [
|
|
|
- "3.C.5 - Indirect N2O",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column": " Year ",
|
|
|
- # # TODO: technically duplicate, just with a different unit
|
|
|
- "categories_to_drop": [
|
|
|
- "3.C.5 -Indirect N2O emissions from managed soils Gg CO2e"
|
|
|
- ],
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual": {
|
|
|
- # TODO the next 2 categories are made up placeholders
|
|
|
- " Volatilization pathway Gg N2O": "3.C.5.i",
|
|
|
- " Leaching/runoff pathway Gg N2O": "3.C.5.ii",
|
|
|
- "3.C.5 -Indirect N2O emissions from managed soils Gg N2O": "3.C.5",
|
|
|
- },
|
|
|
- "entity": "N2O",
|
|
|
- "unit": "Gg",
|
|
|
- },
|
|
|
- "bio_waste" : {
|
|
|
- "page_defs" : {
|
|
|
- "157" : {
|
|
|
- "area" : ["68,748,541,228"],
|
|
|
- "cols" : ["108,176,222,283,332,387,429"],
|
|
|
- },
|
|
|
- },
|
|
|
- "last_year" : "2020",
|
|
|
- "rows_to_fix" : {
|
|
|
- 2 : [
|
|
|
- "Year",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column" : 'Year ',
|
|
|
- # # TODO: technically duplicate, just with a different unit
|
|
|
- "categories_to_drop" : [
|
|
|
- 'Total emissions from SWDS Gg CO2e'
|
|
|
- ],
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual" : {
|
|
|
- # TODO the categories are made up placeholders
|
|
|
- 'Food ' : "4.A.1.food",
|
|
|
- 'Garden ' : "4.A.1.garden",
|
|
|
- 'Paper Gg CH4' : "4.A.1.paper",
|
|
|
- 'Wood ' : "4.A.1.wood",
|
|
|
- 'Textile ' : "4.A.1.textile",
|
|
|
- 'Total ' : "4.A.1.",
|
|
|
- },
|
|
|
- "entity" : "CH4 ",
|
|
|
- "unit" : "Gg",
|
|
|
- },
|
|
|
- "wastewater" : {
|
|
|
- "page_defs" : {
|
|
|
- "161" : {
|
|
|
- "area" : ["60,480,541,85"],
|
|
|
- "cols" : ["98,165,226,281,340,408,465"],
|
|
|
- },
|
|
|
- "162" : {
|
|
|
- "area" : ["62,775,541,613"],
|
|
|
- "cols" : ["110,176,229,288,349,414,486"],
|
|
|
- },
|
|
|
- },
|
|
|
- "last_year" : "2020",
|
|
|
- "col_to_use" : 7,
|
|
|
- "rows_to_fix" : {
|
|
|
- 10 : [
|
|
|
- "Wastewater",
|
|
|
- ],
|
|
|
- },
|
|
|
- "year_column" : ' Year ',
|
|
|
- # # TODO: technically duplicate, just with a different unit
|
|
|
- "categories_to_drop" : [
|
|
|
- ' Domestic wastewater CH4 emissions ',
|
|
|
- ' Domestic wastewater N2O emissions (Gg C',
|
|
|
- ' Industrial wastewater CH4 emissions O2 e)',
|
|
|
- 'Wastewater treatment and discharge Total emissions ',
|
|
|
- ],
|
|
|
- # TODO: This is far from than the actual categories but works for now
|
|
|
- "cat_codes_manual" : {
|
|
|
- # TODO the categories are made up placeholders
|
|
|
- ' Domestic wastewater CH4 emissions (Gg CH4)' : '4.D.1',
|
|
|
- ' Domestic wastewater N2O emissions (Gg N2O)' : '4.D.1',
|
|
|
- ' Industrial wastewater CH4 emissions (Gg CH4)' : '4.D.2',
|
|
|
- },
|
|
|
- "multi_entity" : {
|
|
|
- "unit" : ["Gg", "Gg", "Gg"],
|
|
|
- "entity" : [
|
|
|
- "CH4",
|
|
|
- "N2O",
|
|
|
- "CH4",
|
|
|
- ],
|
|
|
- },
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
df_agg = None
|
|
|
# TODO remove `reversed` (only for development)
|
|
|
- for sector in list(reversed(list(inv_conf_per_sector.keys()))):
|
|
|
+ for sector in list(reversed(list(inv_conf_per_sector.keys()))) :
|
|
|
print("-" * 60)
|
|
|
print(
|
|
|
f"Reading sector {sector} on page(s) {[*inv_conf_per_sector[sector]['page_defs']]}."
|
|
@@ -755,7 +357,7 @@ if __name__ == "__main__":
|
|
|
|
|
|
df_sector = None
|
|
|
|
|
|
- for page in [*inv_conf_per_sector[sector]["page_defs"]]:
|
|
|
+ for page in [*inv_conf_per_sector[sector]["page_defs"]] :
|
|
|
tables_inventory_original = camelot.read_pdf(
|
|
|
str(input_folder / pdf_file),
|
|
|
pages=page,
|
|
@@ -767,9 +369,9 @@ if __name__ == "__main__":
|
|
|
|
|
|
df_sector_page = tables_inventory_original[0].df
|
|
|
|
|
|
- if df_sector is None:
|
|
|
+ if df_sector is None :
|
|
|
df_sector = df_sector_page
|
|
|
- else:
|
|
|
+ else :
|
|
|
df_sector = pd.concat(
|
|
|
[df_sector, df_sector_page],
|
|
|
axis=0,
|
|
@@ -782,13 +384,13 @@ if __name__ == "__main__":
|
|
|
|
|
|
df_sector = df_sector[0 : last_row + 1]
|
|
|
|
|
|
- if "rows_to_fix" in inv_conf_per_sector[sector]:
|
|
|
- for n_rows in inv_conf_per_sector[sector]["rows_to_fix"].keys():
|
|
|
+ if "rows_to_fix" in inv_conf_per_sector[sector] :
|
|
|
+ for n_rows in inv_conf_per_sector[sector]["rows_to_fix"].keys() :
|
|
|
print(f"Merge content for {n_rows=}")
|
|
|
# set the row
|
|
|
- if "col_to_use" in inv_conf_per_sector[sector].keys():
|
|
|
+ if "col_to_use" in inv_conf_per_sector[sector].keys() :
|
|
|
col_to_use = inv_conf_per_sector[sector]["col_to_use"]
|
|
|
- else:
|
|
|
+ else :
|
|
|
col_to_use = 0
|
|
|
df_sector = fix_rows(
|
|
|
df_sector,
|
|
@@ -799,8 +401,8 @@ if __name__ == "__main__":
|
|
|
|
|
|
df_sector = df_sector.reset_index(drop=True)
|
|
|
|
|
|
- if "rows_to_drop" in inv_conf_per_sector[sector]:
|
|
|
- for row in inv_conf_per_sector[sector]["rows_to_drop"]:
|
|
|
+ if "rows_to_drop" in inv_conf_per_sector[sector] :
|
|
|
+ for row in inv_conf_per_sector[sector]["rows_to_drop"] :
|
|
|
df_sector = df_sector.drop(index=row)
|
|
|
|
|
|
# TODO: Is it necessary to set the index here?
|
|
@@ -810,14 +412,14 @@ if __name__ == "__main__":
|
|
|
df_sector = df_sector.T
|
|
|
|
|
|
df_sector = df_sector.rename(
|
|
|
- columns={inv_conf_per_sector[sector]["year_column"]: "category"}
|
|
|
+ columns={inv_conf_per_sector[sector]["year_column"] : "category"}
|
|
|
)
|
|
|
|
|
|
df_sector["category"] = df_sector["category"].str.replace("\n", "")
|
|
|
|
|
|
# TODO Is it not the same as remove categories further down?
|
|
|
- if "categories_to_drop" in inv_conf_per_sector[sector]:
|
|
|
- for row in inv_conf_per_sector[sector]["categories_to_drop"]:
|
|
|
+ if "categories_to_drop" in inv_conf_per_sector[sector] :
|
|
|
+ for row in inv_conf_per_sector[sector]["categories_to_drop"] :
|
|
|
row_to_delete = df_sector.index[df_sector["category"] == row][0]
|
|
|
df_sector = df_sector.drop(index=row_to_delete)
|
|
|
|
|
@@ -825,12 +427,12 @@ if __name__ == "__main__":
|
|
|
inv_conf_per_sector[sector]["cat_codes_manual"]
|
|
|
)
|
|
|
|
|
|
- if "multi_entity" in inv_conf_per_sector[sector]:
|
|
|
+ if "multi_entity" in inv_conf_per_sector[sector] :
|
|
|
df_sector["entity"] = inv_conf_per_sector[sector]["multi_entity"]["entity"]
|
|
|
df_sector["unit"] = inv_conf_per_sector[sector]["multi_entity"]["unit"]
|
|
|
# df_sector = df_sector.set_index(["entity", "unit", "category"])
|
|
|
|
|
|
- else:
|
|
|
+ else :
|
|
|
# unit is always the same
|
|
|
df_sector.loc[:, "unit"] = inv_conf_per_sector[sector]["unit"]
|
|
|
|
|
@@ -839,25 +441,31 @@ if __name__ == "__main__":
|
|
|
|
|
|
# Some categories are in two tables (summary and sector)
|
|
|
# Duplicates need to be removed
|
|
|
- if "remove_duplicates" in inv_conf_per_sector[sector]:
|
|
|
- for row in inv_conf_per_sector[sector]["remove_duplicates"]:
|
|
|
+ if "remove_duplicates" in inv_conf_per_sector[sector] :
|
|
|
+ for row in inv_conf_per_sector[sector]["remove_duplicates"] :
|
|
|
row_to_delete = df_sector.index[df_sector["category"] == row][0]
|
|
|
df_sector = df_sector.drop(index=row_to_delete)
|
|
|
|
|
|
- if df_agg is None:
|
|
|
+ if df_agg is None :
|
|
|
df_agg = df_sector
|
|
|
- else:
|
|
|
+ else :
|
|
|
df_agg = pd.concat(
|
|
|
[df_agg, df_sector],
|
|
|
axis=0,
|
|
|
join="outer",
|
|
|
).reset_index(drop=True)
|
|
|
|
|
|
- for year in [str(y) for y in range(1990, 2021)]:
|
|
|
+ for year in [str(y) for y in range(1990, 2021)] :
|
|
|
df_agg.loc[:, year] = df_agg[year].str.replace(",", "")
|
|
|
|
|
|
# print(df_agg)
|
|
|
- pass
|
|
|
+
|
|
|
+ # add harvested wood products table and all the other sectors together
|
|
|
+ df_agg = pd.concat(
|
|
|
+ [df_agg, df_hwp],
|
|
|
+ axis=0,
|
|
|
+ join="outer",
|
|
|
+ ).reset_index(drop=True)
|
|
|
|
|
|
### convert to interchange format ###
|
|
|
df_agg_IF = pm2.pm2io.convert_wide_dataframe_if(
|