config_TWN_NIR2022.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. # config and functions for Taiwan NIR 2022
  2. from typing import Union, List
  3. import pandas as pd
  4. gwp_to_use = "AR4GWP100"
  5. def fix_rows(data: pd.DataFrame, rows_to_fix: list, col_to_use: str, n_rows: int)->pd.DataFrame:
  6. for row in rows_to_fix:
  7. #print(row)
  8. # find the row number and collect the row and the next two rows
  9. index = data.loc[data[col_to_use] == row].index
  10. if not list(index):
  11. print(f"Can't merge split row {row}")
  12. print(data[col_to_use])
  13. print(f"Merging split row {row}")
  14. indices_to_drop = []
  15. ####print(index)
  16. for item in index:
  17. loc = data.index.get_loc(item)
  18. ####print(data[col_to_use].loc[loc + 1])
  19. if n_rows == -2:
  20. locs_to_merge = list(range(loc - 1, loc + 1))
  21. loc_to_check = loc - 1
  22. #if n_rows == -3:
  23. # locs_to_merge = list(range(loc - 1, loc + 2))
  24. #elif n_rows == -5:
  25. # locs_to_merge = list(range(loc - 1, loc + 4))
  26. else:
  27. locs_to_merge = list(range(loc, loc + n_rows))
  28. loc_to_check = loc + 1
  29. if data[col_to_use].loc[loc_to_check] == '':
  30. rows_to_merge = data.iloc[locs_to_merge]
  31. indices_to_merge = rows_to_merge.index
  32. # replace numerical NaN values
  33. ####print(rows_to_merge)
  34. rows_to_merge = rows_to_merge.fillna('')
  35. ####print("fillna")
  36. ####print(rows_to_merge)
  37. # join the three rows
  38. new_row = rows_to_merge.agg(' '.join)
  39. # replace the double spaces that are created
  40. # must be done here and not at the end as splits are not always
  41. # the same and join would produce different col values
  42. new_row = new_row.str.replace(" ", " ")
  43. new_row = new_row.str.strip()
  44. #new_row = new_row.str.replace("N O", "NO")
  45. #new_row = new_row.str.replace(", N", ",N")
  46. #new_row = new_row.str.replace("- ", "-")
  47. data.loc[indices_to_merge[0]] = new_row
  48. indices_to_drop = indices_to_drop + list(indices_to_merge[1:])
  49. data = data.drop(indices_to_drop)
  50. data = data.reset_index(drop=True)
  51. return data
  52. def make_wide_table(data: pd.DataFrame, keyword: str, col: Union[int, str], index_cols: List[Union[int, str]])->pd.DataFrame:
  53. index = data.loc[data[col] == keyword].index
  54. if not list(index):
  55. print("Keyword for table transformation not found")
  56. return data
  57. elif len(index)==1:
  58. print("Keyword for table transformation found only once")
  59. return data
  60. else:
  61. df_all = None
  62. for i, item in enumerate(index):
  63. loc = data.index.get_loc(item)
  64. if i < len(index) - 1:
  65. next_loc = data.index.get_loc(index[i + 1])
  66. else:
  67. next_loc = data.index[-1] + 1
  68. df_to_add = data.loc[list(range(loc, next_loc))]
  69. # select only cols which don't have NaN, Null, or '' as header
  70. filter_nan = ((~df_to_add.iloc[0].isnull()) & (df_to_add.iloc[0] != 'NaN')& (df_to_add.iloc[0] != ''))
  71. df_to_add = df_to_add.loc[: , filter_nan]
  72. df_to_add.columns = df_to_add.iloc[0]
  73. #print(df_to_add.columns)
  74. df_to_add = df_to_add.drop(loc)
  75. df_to_add = df_to_add.set_index(index_cols)
  76. if df_all is None:
  77. df_all = df_to_add
  78. else:
  79. df_all = pd.concat([df_all, df_to_add], axis=1, join='outer')
  80. return df_all
  81. # page defs tp hold information on reading the table
  82. page_defs = {
  83. '5': {
  84. "table_areas": ['36,523,563,68'],
  85. "split_text": False,
  86. "flavor": "stream",
  87. },
  88. '6': {
  89. "table_areas": ['34,562,563,53'],
  90. #"columns": ['195,228,263,295,328,363,395,428,462,495,529'], # works without
  91. "split_text": True,
  92. "flavor": "stream",
  93. },
  94. '7': {
  95. "table_areas": ['36,740,499,482', '36,430,564,53'],
  96. "split_text": True,
  97. "flavor": "stream",
  98. },
  99. '8': {
  100. "table_areas": ['35,748,503,567'],
  101. "split_text": True,
  102. "flavor": "stream",
  103. },
  104. '9': {
  105. "table_areas": ['35,747,565,315', '36,273,565,50'],
  106. "split_text": False,
  107. "flavor": "stream",
  108. },
  109. '11': {
  110. "table_areas": ['35,744,563,434'],
  111. "split_text": True,
  112. "flavor": "stream",
  113. },
  114. '12': {
  115. "table_areas": ['33,747,562,86'],
  116. "split_text": True,
  117. "flavor": "stream",
  118. },
  119. '13': {
  120. "table_areas": ['34,303,564,54'],
  121. "split_text": True,
  122. "flavor": "stream",
  123. },
  124. '14': {
  125. "table_areas": ['34,754,564,256'],
  126. "columns": ['220,251,283,314,344,371,406,438,470,500,530'],
  127. "split_text": True,
  128. "flavor": "stream",
  129. },
  130. '15': {
  131. "table_areas": ['34,487,564,42'],
  132. "split_text": True,
  133. "flavor": "stream",
  134. },
  135. '16': {
  136. "table_areas": ['34,418,564,125'],
  137. #"columns": ['107,209,241,273,306,338,369,402,433,466,498,533'],
  138. "split_text": True,
  139. "flavor": "lattice",
  140. }, # with stream the row index is messed up with lattice the column index ... red with lattice and fix col header manualy
  141. '17': {
  142. "table_areas": ['34,534,564,49'],
  143. "columns": ['188,232,263,298,331,362,398,432,464,497,530'],
  144. "split_text": True,
  145. "flavor": "stream",
  146. },
  147. }
  148. # table defs to hold information on how to process the tables
  149. table_defs = {
  150. 'ES2.2': { # 1990-2020 Carbon Dioxide Emissions and Sequestration in Taiwan
  151. "tables": [1, 2],
  152. "rows_to_fix": {
  153. 0: {
  154. 3: ['1.A.4.c Agriculture, Forestry, Fishery, and',
  155. '2.D Non-Energy Products from Fuels and',
  156. '4. Land Use, Land Use Change and Forestry'],
  157. },
  158. },
  159. "index_cols": ['GHG Emission Source and Sinks'],
  160. "wide_keyword": 'GHG Emission Source and Sinks',
  161. "col_wide_kwd": 0,
  162. "entity": "CO2",
  163. "unit": "kt",
  164. "cat_codes_manual": {
  165. 'Net GHG Emission (including LULUCF)': '0',
  166. 'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
  167. },
  168. },
  169. 'ES2.3': { # 1990-2020 Methane Emissions in Taiwan
  170. "tables": [3, 4],
  171. "rows_to_fix": {},
  172. "index_cols": ['GHG Emission Sources and Sinks'],
  173. "wide_keyword": 'GHG Emission Sources and Sinks',
  174. "col_wide_kwd": 0,
  175. "entity": f"CH4 ({gwp_to_use})",
  176. "unit": "ktCO2eq",
  177. "cat_codes_manual": {
  178. 'Total Methane Emissions': '0',
  179. },
  180. },
  181. 'ES2.4': { # 1990-2020 Nitrous Oxide Emissions in Taiwan
  182. "tables": [5],
  183. "fix_cats": {
  184. 0: {
  185. "Total Nitrous Oxide Emissionsl": "Total Nitrous Oxide Emissions",
  186. },
  187. },
  188. "rows_to_fix": {},
  189. "index_cols": ['GHG Emission Sources and Sinks'],
  190. "wide_keyword": 'GHG Emission Sources and Sinks',
  191. "col_wide_kwd": 0,
  192. "entity": f"N2O ({gwp_to_use})",
  193. "unit": "ktCO2eq",
  194. "cat_codes_manual": {
  195. 'Total Nitrous Oxide Emissions': '0',
  196. },
  197. },
  198. 'ES3.1': { # 1990-2020 Greenhouse Gas Emission in Taiwan by Sector
  199. "tables": [7],
  200. "rows_to_fix": {},
  201. "index_cols": ['GHG Emission Sources and Sinks'],
  202. "wide_keyword": 'GHG Emission Sources and Sinks',
  203. "col_wide_kwd": 0,
  204. "entity": f"KYOTOGHG ({gwp_to_use})",
  205. "unit": "ktCO2eq",
  206. "cat_codes_manual": {
  207. 'Net GHG Emission (including LULUCF)': '0',
  208. 'Total GHG Emission (excluding LULUCF)': 'M.0.EL',
  209. },
  210. },
  211. 'ES3.2': { # 1990-2020 Greenhouse Gas Emissions Produced by Energy Sector in Taiwan
  212. "tables": [8],
  213. "rows_to_fix": {},
  214. "index_cols": ['GHG Emission Sources and Sinks'],
  215. "wide_keyword": 'GHG Emission Sources and Sinks',
  216. "col_wide_kwd": 0,
  217. "gas_splitting": {
  218. "Total CO2 Emission": "CO2",
  219. "Total CH4 Emission": f"CH4 ({gwp_to_use})",
  220. "Total N2O Emission": f"N2O ({gwp_to_use})",
  221. "Total Emission from Energy Sector": f"KYOTOGHG ({gwp_to_use})",
  222. "GHG Emission Sources and Sinks": "entity",
  223. },
  224. "unit": "ktCO2eq",
  225. "cat_codes_manual": {
  226. 'Total CO2 Emission': '1',
  227. 'Total CH4 Emission': '1',
  228. 'Total N2O Emission': '1',
  229. 'Total Emission from Energy Sector': '1',
  230. },
  231. },
  232. 'ES3.3': { # 1990-2020 Greenhouse Gas Emissions Produced by Industrial Process and Product Use Sector (IPPU) in Taiwan
  233. "tables": [9,10],
  234. "rows_to_fix": {},
  235. "index_cols": ['GHG Emission Sources and Sinks'],
  236. "wide_keyword": 'GHG Emission Sources and Sinks',
  237. "col_wide_kwd": 0,
  238. "gas_splitting": {
  239. "Total CO2 Emission": "CO2",
  240. "Total CH4 Emission": f"CH4 ({gwp_to_use})",
  241. "Total N2O Emission": f"N2O ({gwp_to_use})",
  242. "Total HFCs Emission": f"HFCS ({gwp_to_use})",
  243. "Total PFCs Emission (2.E Electronics Industry)": f"PFCS ({gwp_to_use})",
  244. "Total SF6 Emission": f"SF6 ({gwp_to_use})",
  245. "Total NF3 Emission (2.E Electronics Industry)": f"NF3 ({gwp_to_use})",
  246. "Total Emission from IPPU Sector": f"KYOTOGHG ({gwp_to_use})",
  247. "GHG Emission Sources and Sinks": "entity",
  248. },
  249. "unit": "ktCO2eq",
  250. "cat_codes_manual": {
  251. 'Total CO2 Emission': '2',
  252. 'Total CH4 Emission': '2',
  253. 'Total N2O Emission': '2',
  254. 'Total HFCs Emission': '2',
  255. 'Total PFCs Emission (2.E Electronics Industry)': '2.E',
  256. 'Total SF6 Emission': '2',
  257. 'Total NF3 Emission (2.E Electronics Industry)': '2.E',
  258. 'Total Emission from IPPU Sector': '2',
  259. },
  260. "drop_rows": [
  261. ("2.D Non-Energy Products from Fuels and Solvent Use", "CO2"), # has lower significant digits than in table ES2.2
  262. ]
  263. },
  264. 'ES3.4': { # 1990-2020 Greenhouse Gas Emissions Produced by Agriculture Sector in Taiwan
  265. "tables": [11],
  266. "rows_to_fix": {},
  267. "index_cols": ['GHG Emission Sources and Sinks'],
  268. "wide_keyword": 'GHG Emission Sources and Sinks',
  269. "col_wide_kwd": 0,
  270. "gas_splitting": {
  271. "Total CO2 Emission (3.H Urea applied)": "CO2",
  272. "Total CH4 Emission": f"CH4 ({gwp_to_use})",
  273. "Total N2O Emission": f"N2O ({gwp_to_use})",
  274. "Total Emission From Agriculture Sector": f"KYOTOGHG ({gwp_to_use})",
  275. "GHG Emission Sources and Sinks": "entity",
  276. },
  277. "unit": "ktCO2eq",
  278. "cat_codes_manual": {
  279. 'Total CO2 Emission (3.H Urea applied)': '3.H',
  280. 'Total CH4 Emission': '3',
  281. 'Total N2O Emission': '3',
  282. 'Total Emission From Agriculture Sector': '3',
  283. },
  284. },
  285. 'ES3.6': { # 1990-2020 Greenhouse Gas Emissions in Taiwan by Waste Sector
  286. "tables": [13],
  287. "rows_to_fix": {
  288. 0: {
  289. 3: ["Total CO2 Emission"],
  290. },
  291. },
  292. "index_cols": ['GHG Emission Sources and Sinks'],
  293. "wide_keyword": 'GHG Emission Sources and Sinks',
  294. "col_wide_kwd": 0, # two column header
  295. "gas_splitting": {
  296. "Total CO2 Emission (5.C Incineration and Open Burning of Waste)": "CO2",
  297. "Total CH4 Emission": f"CH4 ({gwp_to_use})",
  298. "Total N2O Emission": f"N2O ({gwp_to_use})",
  299. "Total Emission from Waste Sector": f"KYOTOGHG ({gwp_to_use})",
  300. "GHG Emission Sources and Sinks": "entity",
  301. },
  302. "unit": "ktCO2eq",
  303. "cat_codes_manual": {
  304. 'Total CO2 Emission (5.C Incineration and Open Burning of Waste)': '5.C',
  305. 'Total CH4 Emission': '5',
  306. 'Total N2O Emission': '5',
  307. 'Total Emission from Waste Sector': '5',
  308. },
  309. },
  310. }
  311. table_defs_skip = {
  312. 'ES2.1': { # 1990-2020 Greenhouse Gas Emissions and Sequestration in Taiwan by Type
  313. "tables": [0],
  314. "rows_to_fix": {
  315. 0: {
  316. 3: ['CO2'],
  317. },
  318. 1: { # wherte col 0 is empty
  319. 3: ['Net GHG Emission', 'Total GHG Emission'],
  320. },
  321. },
  322. "index_cols": ['GHG', 'GWP'],
  323. "wide_keyword": 'GHG',
  324. "col_wide_kwd": 0,
  325. "unit": "ktCO2eq",
  326. },
  327. 'ES2.5': { # 1990-2020 Fluoride-Containing Gas Emissions in Taiwan
  328. "tables": [6],
  329. "rows_to_fix": {
  330. 0: {
  331. -2: ['Total SF6 Emissions',
  332. 'Total NF3 Emissions'],
  333. },
  334. },
  335. "index_cols": ['GHG Emission Sources and Sinks'],
  336. "wide_keyword": 'GHG Emission Sources and Sinks',
  337. "col_wide_kwd": 0,
  338. #"entity": "CO2",
  339. "unit": "ktCO2eq",
  340. },
  341. 'ES3.5': { # skip for now: 1990-2020 Changes in Carbon Sequestration by LULUCF Sector in Taiwan2],
  342. "tables": [12],
  343. "rows_to_fix": {},
  344. "index_cols": ['GHG Emission Sources and Sinks'], #header is merged col :-(
  345. "wide_keyword": 'GHG Emission Sources and Sinks',
  346. "col_wide_kwd": 0, # two column header
  347. "unit": "kt",
  348. "entity": "CO2",
  349. }, # need to consider the two columns specially (merge?)
  350. }