dodo.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625
  1. """
  2. Define the tasks for UNFCCC data repository
  3. """
  4. import os
  5. import sys
  6. import datalad.api
  7. from doit import get_var
  8. root_path = "."
  9. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  10. from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_devel import ( # noqa: E402
  11. read_year_to_test_specs,
  12. )
  13. from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import ( # noqa: E402
  14. read_crf_for_country_datalad,
  15. read_new_crf_for_year_datalad,
  16. )
  17. def set_root_path():
  18. """Set the root folder for the repository"""
  19. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  20. def map_folders(parent_folder):
  21. """
  22. Create or update the folder mapping in the given folder
  23. Internal function
  24. """
  25. datalad.api.run(
  26. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  27. f"--folder={parent_folder}",
  28. dataset=root_path,
  29. message=f"Update folder mapping for {parent_folder}",
  30. outputs=f"{parent_folder}/folder_mapping.json",
  31. dry_run=None,
  32. explicit=True,
  33. )
  34. def task_in_venv():
  35. """
  36. Check if code run from virtual environment and throw an error is not.
  37. Returns
  38. -------
  39. Nothing
  40. """
  41. def in_venv():
  42. if sys.prefix == sys.base_prefix:
  43. raise ValueError( # noqa: TRY003
  44. "You need to run the code from the virtual environment."
  45. )
  46. return {
  47. "actions": [in_venv],
  48. }
  49. # set UNFCCC_GHG_ROOT_PATH environment variable
  50. def task_set_env():
  51. """
  52. Set the environment variable for the module so data is stored in the correct folders
  53. """
  54. return {
  55. "actions": [set_root_path],
  56. }
  57. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  58. read_config_folder = {
  59. "folder": get_var("folder", None),
  60. }
  61. def task_map_folders():
  62. """
  63. Create or update the folder mapping in the given folder
  64. """
  65. return {
  66. "actions": [(map_folders, [read_config_folder["folder"]])],
  67. "verbosity": 2,
  68. "setup": ["in_venv"],
  69. }
  70. # Tasks for getting submissions and downloading them
  71. def task_update_bur():
  72. """Update list of BUR submissions"""
  73. def fetch_bur():
  74. datalad.api.run(
  75. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  76. "fetch_submissions_bur.py",
  77. dataset=root_path,
  78. message="Fetch BUR submissions",
  79. outputs="downloaded_data/UNFCCC/submissions-bur.csv",
  80. dry_run=None,
  81. explicit=True,
  82. )
  83. return {
  84. "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
  85. "actions": [
  86. (fetch_bur,),
  87. ],
  88. "verbosity": 2,
  89. "setup": ["in_venv"],
  90. }
  91. def task_download_bur():
  92. """Download BUR submissions"""
  93. def download_bur():
  94. (
  95. datalad.api.run(
  96. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  97. "download_nonannexI.py --category=BUR",
  98. dataset=root_path,
  99. message="Download BUR submissions",
  100. inputs="downloaded_data/UNFCCC/submissions-bur.csv",
  101. dry_run=None,
  102. explicit=False,
  103. ),
  104. )
  105. return {
  106. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  107. # deactivate file_dep fow now as it will always run fetch submissions
  108. # before download
  109. "actions": [
  110. (download_bur,),
  111. (map_folders, ["downloaded_data/UNFCCC"]),
  112. ],
  113. "verbosity": 2,
  114. "setup": ["in_venv"],
  115. }
  116. def task_update_nc():
  117. """Update list of NC submissions"""
  118. def fetch_nc():
  119. datalad.api.run(
  120. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  121. "fetch_submissions_nc.py",
  122. dataset=root_path,
  123. message="Fetch NC submissions",
  124. outputs="downloaded_data/UNFCCC/submissions-nc.csv",
  125. dry_run=None,
  126. explicit=True,
  127. )
  128. return {
  129. "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
  130. "actions": [
  131. (fetch_nc,),
  132. ],
  133. "verbosity": 2,
  134. "setup": ["in_venv"],
  135. }
  136. def task_download_nc():
  137. """Download BUR submissions"""
  138. def download_nc():
  139. (
  140. datalad.api.run(
  141. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  142. "download_nonannexI.py --category=NC",
  143. dataset=root_path,
  144. message="Download NC submissions",
  145. inputs="downloaded_data/UNFCCC/submissions-nc.csv",
  146. dry_run=None,
  147. explicit=False,
  148. ),
  149. )
  150. return {
  151. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  152. # deactivate file_dep fow now as it will always run fetch submissions
  153. # before download
  154. "actions": [
  155. (download_nc,),
  156. (map_folders, ["downloaded_data/UNFCCC"]),
  157. ],
  158. "verbosity": 2,
  159. "setup": ["in_venv"],
  160. }
  161. # annexI data: one update call for all data types (as they are on one page)
  162. # but for each year separately.
  163. # downloading is per year and
  164. update_aI_config = {
  165. "year": get_var("year", None),
  166. "category": get_var("category", None),
  167. }
  168. def task_update_annexi():
  169. """Update list of AnnexI submissions"""
  170. def fetch_annexi():
  171. (
  172. datalad.api.run(
  173. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  174. "fetch_submissions_annexI.py "
  175. f"--year={update_aI_config['year']}",
  176. dataset=root_path,
  177. message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
  178. outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  179. f"{update_aI_config['year']}.csv",
  180. dry_run=None,
  181. explicit=True,
  182. ),
  183. )
  184. return {
  185. "targets": [
  186. f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
  187. ],
  188. "actions": [
  189. (fetch_annexi,),
  190. ],
  191. "verbosity": 2,
  192. "setup": ["in_venv"],
  193. }
  194. def task_download_annexi():
  195. """Download AnnexI submissions"""
  196. def download_annexi():
  197. (
  198. datalad.api.run(
  199. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  200. f"--category={update_aI_config['category']} "
  201. f"--year={update_aI_config['year']}",
  202. dataset=root_path,
  203. message=f"Download AnnexI submissions for "
  204. f"{update_aI_config['category']}"
  205. f"{update_aI_config['year']}",
  206. inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  207. f"{update_aI_config['year']}.csv",
  208. dry_run=None,
  209. explicit=False,
  210. ),
  211. )
  212. return {
  213. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-annex1_"
  214. # f"{update_aI_config['year']}.csv"],
  215. # deactivate file_dep fow now as it will always run fetch submissions
  216. # before download
  217. "actions": [
  218. (download_annexi,),
  219. (map_folders, ["downloaded_data/UNFCCC"]),
  220. ],
  221. "verbosity": 2,
  222. "setup": ["in_venv"],
  223. }
  224. # BTR data: one update call for all data types (as they are on one page)
  225. # but for each submission round separately.
  226. # downloading is per submission round
  227. update_btr_config = {
  228. "round": get_var("round", None),
  229. }
  230. def task_update_btr():
  231. """Update list of BTR submissions"""
  232. def fetch_btr():
  233. (
  234. datalad.api.run(
  235. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  236. "fetch_submissions_btr.py "
  237. f"--round={update_btr_config['round']}",
  238. dataset=root_path,
  239. message=f"Fetch Biannial Transparency Report submissions for "
  240. f"BTR{update_btr_config['round']}",
  241. outputs=f"downloaded_data/UNFCCC/submissions-BTR"
  242. f"{update_btr_config['round']}.csv",
  243. dry_run=None,
  244. explicit=True,
  245. ),
  246. )
  247. return {
  248. "targets": [
  249. f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
  250. ],
  251. "actions": [
  252. (fetch_btr,),
  253. ],
  254. "verbosity": 2,
  255. "setup": ["in_venv"],
  256. }
  257. def task_download_btr():
  258. """Download BTR submissions"""
  259. def download_btr():
  260. (
  261. datalad.api.run(
  262. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
  263. f"--round={update_btr_config['round']}",
  264. dataset=root_path,
  265. message="Download BTR submissions for "
  266. f"BTR{update_btr_config['round']}",
  267. inputs=f"downloaded_data/UNFCCC/submissions-BTR"
  268. f"{update_btr_config['round']}.csv",
  269. dry_run=None,
  270. explicit=False,
  271. ),
  272. )
  273. return {
  274. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-btr.csv "
  275. # f"{update_btr_config['round']}.csv"],
  276. # deactivate file_dep fow now as it will always run fetch submissions
  277. # before download
  278. "actions": [
  279. (download_btr,),
  280. (map_folders, ["downloaded_data/UNFCCC"]),
  281. ],
  282. "verbosity": 2,
  283. "setup": ["in_venv"],
  284. }
  285. def task_download_ndc():
  286. """Download NDC submissions"""
  287. def download_ndc():
  288. (
  289. datalad.api.run(
  290. cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
  291. dataset=root_path,
  292. message="Download NDC submissions",
  293. inputs=None,
  294. dry_run=None,
  295. explicit=False,
  296. ),
  297. )
  298. return {
  299. "actions": [
  300. (download_ndc,),
  301. (map_folders, ["downloaded_data/UNFCCC"]),
  302. ],
  303. "verbosity": 2,
  304. "setup": ["in_venv"],
  305. }
  306. # read UNFCCC submissions.
  307. # datalad run is called from within the read_UNFCCC_submission.py script
  308. read_config = {
  309. "country": get_var("country", None),
  310. "submission": get_var("submission", None),
  311. }
  312. # TODO: make individual task for non-UNFCCC submissions
  313. def task_read_unfccc_submission():
  314. """Read submission for a country (if code exists) (not for CRF)
  315. Datalad is called from `read_UNFCCC_submission`, so we can just call this script
  316. here.
  317. TODO: check if it makes sense to convert script to function
  318. """
  319. return {
  320. "actions": [
  321. f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  322. f"--country={read_config['country']} "
  323. f"--submission={read_config['submission']}",
  324. (map_folders, ["extracted_data/UNFCCC"]),
  325. ],
  326. "verbosity": 2,
  327. "setup": ["in_venv"],
  328. }
  329. # read UNFCCC CRF submissions.
  330. # datalad run is called from within the read_UNFCCC_submission.py script
  331. read_config_crf = {
  332. "country": get_var("country", None),
  333. "submission_year": get_var("submission_year", None),
  334. "submission_date": get_var("submission_date", None),
  335. "re_read": get_var("re_read", False),
  336. "countries": get_var("countries", None),
  337. "data_year": get_var("data_year", None),
  338. "totest": get_var("totest", None),
  339. "type": get_var("type", "CRF"),
  340. }
  341. def task_read_unfccc_crf_submission():
  342. """Read CRF submission for a country"""
  343. def read_CRF():
  344. if read_config_crf["re_read"] == "True":
  345. re_read = True
  346. else:
  347. re_read = False
  348. read_crf_for_country_datalad(
  349. read_config_crf["country"],
  350. submission_year=int(read_config_crf["submission_year"]),
  351. submission_date=read_config_crf["submission_date"],
  352. re_read=re_read,
  353. type=read_config_crf["type"],
  354. )
  355. return {
  356. "actions": [
  357. (read_CRF,),
  358. (map_folders, ["extracted_data/UNFCCC"]),
  359. ],
  360. "task_dep": ["set_env"],
  361. "verbosity": 2,
  362. "setup": ["in_venv"],
  363. }
  364. def task_read_new_unfccc_crf_for_year():
  365. """
  366. Read CRF/CRT submission for all countries for given submission year.
  367. By default only reads data not present yet. Only reads the latest updated
  368. submission for each country.
  369. """
  370. def read_new_CRF():
  371. if read_config_crf["re_read"] == "True":
  372. re_read = True
  373. else:
  374. re_read = False
  375. read_new_crf_for_year_datalad(
  376. submission_year=int(read_config_crf["submission_year"]),
  377. countries=read_config_crf["countries"],
  378. re_read=re_read,
  379. type=read_config_crf["type"],
  380. )
  381. return {
  382. "actions": [
  383. (read_new_CRF,),
  384. (map_folders, ["extracted_data/UNFCCC"]),
  385. ],
  386. "task_dep": ["set_env"],
  387. "verbosity": 2,
  388. "setup": ["in_venv"],
  389. }
  390. def task_test_read_unfccc_crf_for_year():
  391. """
  392. Test CRF/CRT reading.
  393. Test CRF/CRT with a single year only for speed and logging to extend specifications
  394. if necessary.
  395. """
  396. def read_CRF():
  397. if read_config_crf["totest"] == "True":
  398. totest = True
  399. else:
  400. totest = False
  401. if read_config_crf["data_year"] is not None:
  402. data_year = int(read_config_crf["data_year"])
  403. else:
  404. data_year = None
  405. read_year_to_test_specs(
  406. submission_year=int(read_config_crf["submission_year"]),
  407. data_year=data_year,
  408. totest=totest,
  409. country_code=read_config_crf["country"],
  410. type=read_config_crf["type"],
  411. )
  412. return {
  413. "actions": [(read_CRF,)],
  414. "verbosity": 2,
  415. "setup": ["in_venv"],
  416. }
  417. def task_compile_raw_unfccc_crf_for_year():
  418. """
  419. Collect all latest CRF/CRT submissions for a given year / submission round
  420. Reads the latest data from the extracted data folder for each country.
  421. Notifies the user if new data are available in the downloaded_data folder
  422. which have not yet been read.
  423. Data are saved in the datasets/UNFCCC/[CRFYYYY|CRTX] folder.
  424. """
  425. actions = [
  426. f"python "
  427. f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  428. f"--submission_year={read_config_crf['submission_year']} "
  429. f"--type={read_config_crf['type']} "
  430. ]
  431. return {
  432. "actions": actions,
  433. "task_dep": ["set_env"],
  434. "verbosity": 2,
  435. "setup": ["in_venv"],
  436. }
  437. #
  438. # # tasks for DI reader
  439. # # datalad run is called from within the read_unfccc_di_for_country.py script
  440. # read_config_di = {
  441. # "country": get_var("country", None),
  442. # "date": get_var("date", None),
  443. # "annexI": get_var("annexI", False),
  444. # # "countries": get_var('countries', None),
  445. # }
  446. #
  447. #
  448. # def task_read_unfccc_di_for_country():
  449. # """Read DI data for a country"""
  450. # actions = [
  451. # f"python "
  452. # f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  453. # f"--country={read_config_di['country']}",
  454. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  455. # "--folder=extracted_data/UNFCCC",
  456. # ]
  457. # return {
  458. # "actions": actions,
  459. # "task_dep": ["set_env"],
  460. # "verbosity": 2,
  461. # "setup": ["in_venv"],
  462. # }
  463. #
  464. #
  465. # def task_process_unfccc_di_for_country():
  466. # """Process DI data for a country"""
  467. # actions = [
  468. # f"python "
  469. # f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  470. # f".py "
  471. # f"--country={read_config_di['country']} --date={read_config_di['date']}",
  472. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  473. # "--folder=extracted_data/UNFCCC",
  474. # ]
  475. # return {
  476. # "actions": actions,
  477. # "task_dep": ["set_env"],
  478. # "verbosity": 2,
  479. # "setup": ["in_venv"],
  480. # }
  481. #
  482. #
  483. # def task_read_unfccc_di_for_country_group():
  484. # """Read DI data for a country group"""
  485. # actions = [
  486. # "python "
  487. # "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  488. # ".py",
  489. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  490. # "--folder=extracted_data/UNFCCC",
  491. # ]
  492. # if read_config_di["annexI"] == "True":
  493. # actions[0] = actions[0] + " --annexI"
  494. #
  495. # return {
  496. # "actions": actions,
  497. # "task_dep": ["set_env"],
  498. # "verbosity": 2,
  499. # "setup": ["in_venv"],
  500. # }
  501. #
  502. #
  503. # def task_process_unfccc_di_for_country_group():
  504. # """Process DI data for a country group"""
  505. # actions = [
  506. # "python "
  507. # "src/unfccc_ghg_data/unfccc_di_reader"
  508. # "/process_unfccc_di_for_country_group_datalad"
  509. # ".py",
  510. # ]
  511. # if read_config_di["annexI"] == "True":
  512. # actions[0] = actions[0] + " --annexI"
  513. # if read_config_di["date"] is not None:
  514. # actions[0] = actions[0] + f" --date={read_config_di['date']}"
  515. #
  516. # return {
  517. # "actions": actions,
  518. # "task_dep": ["set_env"],
  519. # "verbosity": 2,
  520. # "setup": ["in_venv"],
  521. # }
  522. #
  523. #
  524. # # general tasks
  525. # def task_country_info():
  526. # """
  527. # Print information on submissions and datasets available for given country
  528. # """
  529. # return {
  530. # "actions": [
  531. # f"python src/unfccc_ghg_data/helper/country_info.py "
  532. # f"--country={read_config['country']}"
  533. # ],
  534. # "task_dep": ["set_env"],
  535. # "verbosity": 2,
  536. # "setup": ["in_venv"],
  537. # }