dodo.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612
  1. """
  2. Define the tasks for UNFCCC data repository
  3. """
  4. import os
  5. import sys
  6. import datalad.api
  7. from doit import get_var
  8. root_path = "."
  9. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  10. from unfccc_ghg_data.unfccc_crf_reader.unfccc_crf_reader_prod import ( # noqa: E402
  11. read_crf_for_country_datalad,
  12. )
  13. def set_root_path():
  14. """Set the root folder for the repository"""
  15. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  16. def map_folders(parent_folder):
  17. """
  18. Create or update the folder mapping in the given folder
  19. Internal function
  20. """
  21. datalad.api.run(
  22. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  23. f"--folder={parent_folder}",
  24. dataset=root_path,
  25. message=f"Update folder mapping for {parent_folder}",
  26. outputs=f"{parent_folder}/folder_mapping.json",
  27. dry_run=None,
  28. explicit=True,
  29. )
  30. def task_in_venv():
  31. """
  32. Check if code run from virtual environment and throw an error is not.
  33. Returns
  34. -------
  35. Nothing
  36. """
  37. def in_venv():
  38. if sys.prefix == sys.base_prefix:
  39. raise ValueError( # noqa: TRY003
  40. "You need to run the code from the virtual environment."
  41. )
  42. return {
  43. "actions": [in_venv],
  44. }
  45. # set UNFCCC_GHG_ROOT_PATH environment variable
  46. def task_set_env():
  47. """
  48. Set the environment variable for the module so data is stored in the correct folders
  49. """
  50. return {
  51. "actions": [set_root_path],
  52. }
  53. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  54. read_config_folder = {
  55. "folder": get_var("folder", None),
  56. }
  57. def task_map_folders():
  58. """
  59. Create or update the folder mapping in the given folder
  60. """
  61. return {
  62. "actions": [(map_folders, [read_config_folder["folder"]])],
  63. "verbosity": 2,
  64. "setup": ["in_venv"],
  65. }
  66. # Tasks for getting submissions and downloading them
  67. def task_update_bur():
  68. """Update list of BUR submissions"""
  69. def fetch_bur():
  70. datalad.api.run(
  71. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  72. "fetch_submissions_bur.py",
  73. dataset=root_path,
  74. message="Fetch BUR submissions",
  75. outputs="downloaded_data/UNFCCC/submissions-bur.csv",
  76. dry_run=None,
  77. explicit=True,
  78. )
  79. return {
  80. "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
  81. "actions": [
  82. (fetch_bur,),
  83. ],
  84. "verbosity": 2,
  85. "setup": ["in_venv"],
  86. }
  87. def task_download_bur():
  88. """Download BUR submissions"""
  89. def download_bur():
  90. (
  91. datalad.api.run(
  92. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  93. "download_nonannexI.py --category=BUR",
  94. dataset=root_path,
  95. message="Download BUR submissions",
  96. inputs="downloaded_data/UNFCCC/submissions-bur.csv",
  97. dry_run=None,
  98. explicit=False,
  99. ),
  100. )
  101. return {
  102. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  103. # deactivate file_dep fow now as it will always run fetch submissions
  104. # before download
  105. "actions": [
  106. (download_bur,),
  107. (map_folders, ["downloaded_data/UNFCCC"]),
  108. ],
  109. "verbosity": 2,
  110. "setup": ["in_venv"],
  111. }
  112. def task_update_nc():
  113. """Update list of NC submissions"""
  114. def fetch_nc():
  115. datalad.api.run(
  116. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  117. "fetch_submissions_nc.py",
  118. dataset=root_path,
  119. message="Fetch NC submissions",
  120. outputs="downloaded_data/UNFCCC/submissions-nc.csv",
  121. dry_run=None,
  122. explicit=True,
  123. )
  124. return {
  125. "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
  126. "actions": [
  127. (fetch_nc,),
  128. ],
  129. "verbosity": 2,
  130. "setup": ["in_venv"],
  131. }
  132. def task_download_nc():
  133. """Download BUR submissions"""
  134. def download_nc():
  135. (
  136. datalad.api.run(
  137. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  138. "download_nonannexI.py --category=NC",
  139. dataset=root_path,
  140. message="Download NC submissions",
  141. inputs="downloaded_data/UNFCCC/submissions-nc.csv",
  142. dry_run=None,
  143. explicit=False,
  144. ),
  145. )
  146. return {
  147. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  148. # deactivate file_dep fow now as it will always run fetch submissions
  149. # before download
  150. "actions": [
  151. (download_nc,),
  152. (map_folders, ["downloaded_data/UNFCCC"]),
  153. ],
  154. "verbosity": 2,
  155. "setup": ["in_venv"],
  156. }
  157. # annexI data: one update call for all data types (as they are on one page)
  158. # but for each year separately.
  159. # downloading is per year and
  160. update_aI_config = {
  161. "year": get_var("year", None),
  162. "category": get_var("category", None),
  163. }
  164. def task_update_annexi():
  165. """Update list of AnnexI submissions"""
  166. def fetch_annexi():
  167. (
  168. datalad.api.run(
  169. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  170. "fetch_submissions_annexI.py "
  171. f"--year={update_aI_config['year']}",
  172. dataset=root_path,
  173. message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
  174. outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  175. f"{update_aI_config['year']}.csv",
  176. dry_run=None,
  177. explicit=True,
  178. ),
  179. )
  180. return {
  181. "targets": [
  182. f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
  183. ],
  184. "actions": [
  185. (fetch_annexi,),
  186. ],
  187. "verbosity": 2,
  188. "setup": ["in_venv"],
  189. }
  190. def task_download_annexi():
  191. """Download AnnexI submissions"""
  192. def download_annexi():
  193. (
  194. datalad.api.run(
  195. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  196. f"--category={update_aI_config['category']} "
  197. f"--year={update_aI_config['year']}",
  198. dataset=root_path,
  199. message=f"Download AnnexI submissions for "
  200. f"{update_aI_config['category']}"
  201. f"{update_aI_config['year']}",
  202. inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  203. f"{update_aI_config['year']}.csv",
  204. dry_run=None,
  205. explicit=False,
  206. ),
  207. )
  208. return {
  209. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-annex1_"
  210. # f"{update_aI_config['year']}.csv"],
  211. # deactivate file_dep fow now as it will always run fetch submissions
  212. # before download
  213. "actions": [
  214. (download_annexi,),
  215. (map_folders, ["downloaded_data/UNFCCC"]),
  216. ],
  217. "verbosity": 2,
  218. "setup": ["in_venv"],
  219. }
  220. # BTR data: one update call for all data types (as they are on one page)
  221. # but for each submission round separately.
  222. # downloading is per submission round
  223. update_btr_config = {
  224. "round": get_var("round", None),
  225. }
  226. def task_update_btr():
  227. """Update list of BTR submissions"""
  228. def fetch_btr():
  229. (
  230. datalad.api.run(
  231. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  232. "fetch_submissions_btr.py "
  233. f"--round={update_btr_config['round']}",
  234. dataset=root_path,
  235. message=f"Fetch Biannial Transparency Report submissions for "
  236. f"BTR{update_btr_config['round']}",
  237. outputs=f"downloaded_data/UNFCCC/submissions-BTR"
  238. f"{update_btr_config['round']}.csv",
  239. dry_run=None,
  240. explicit=True,
  241. ),
  242. )
  243. return {
  244. "targets": [
  245. f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
  246. ],
  247. "actions": [
  248. (fetch_btr,),
  249. ],
  250. "verbosity": 2,
  251. "setup": ["in_venv"],
  252. }
  253. def task_download_btr():
  254. """Download BTR submissions"""
  255. def download_btr():
  256. (
  257. datalad.api.run(
  258. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
  259. f"--round={update_btr_config['round']}",
  260. dataset=root_path,
  261. message="Download BTR submissions for "
  262. f"BTR{update_btr_config['round']}",
  263. inputs=f"downloaded_data/UNFCCC/submissions-BTR"
  264. f"{update_btr_config['round']}.csv",
  265. dry_run=None,
  266. explicit=False,
  267. ),
  268. )
  269. return {
  270. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-btr.csv "
  271. # f"{update_btr_config['round']}.csv"],
  272. # deactivate file_dep fow now as it will always run fetch submissions
  273. # before download
  274. "actions": [
  275. (download_btr,),
  276. (map_folders, ["downloaded_data/UNFCCC"]),
  277. ],
  278. "verbosity": 2,
  279. "setup": ["in_venv"],
  280. }
  281. def task_download_ndc():
  282. """Download NDC submissions"""
  283. def download_ndc():
  284. (
  285. datalad.api.run(
  286. cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
  287. dataset=root_path,
  288. message="Download NDC submissions",
  289. inputs=None,
  290. dry_run=None,
  291. explicit=False,
  292. ),
  293. )
  294. return {
  295. "actions": [
  296. (download_ndc,),
  297. (map_folders, ["downloaded_data/UNFCCC"]),
  298. ],
  299. "verbosity": 2,
  300. "setup": ["in_venv"],
  301. }
  302. # read UNFCCC submissions.
  303. # datalad run is called from within the read_UNFCCC_submission.py script
  304. read_config = {
  305. "country": get_var("country", None),
  306. "submission": get_var("submission", None),
  307. }
  308. # TODO: make individual task for non-UNFCCC submissions
  309. def task_read_unfccc_submission():
  310. """Read submission for a country (if code exists) (not for CRF)
  311. Datalad is called from `read_UNFCCC_submission`, so we can just call this script
  312. here.
  313. TODO: check if it makes sense to convert script to function
  314. """
  315. return {
  316. "actions": [
  317. f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  318. f"--country={read_config['country']} "
  319. f"--submission={read_config['submission']}",
  320. (map_folders, ["extracted_data/UNFCCC"]),
  321. ],
  322. "verbosity": 2,
  323. "setup": ["in_venv"],
  324. }
  325. # read UNFCCC CRF submissions.
  326. # datalad run is called from within the read_UNFCCC_submission.py script
  327. read_config_crf = {
  328. "country": get_var("country", None),
  329. "submission_year": get_var("submission_year", None),
  330. "submission_date": get_var("submission_date", None),
  331. "re_read": get_var("re_read", False),
  332. "countries": get_var("countries", None),
  333. "data_year": get_var("data_year", None),
  334. "totest": get_var("totest", None),
  335. }
  336. def task_read_unfccc_crf_submission():
  337. """Read CRF submission for a country"""
  338. def read_CRF():
  339. if read_config_crf["re_read"] == "True":
  340. re_read = True
  341. else:
  342. re_read = False
  343. read_crf_for_country_datalad(
  344. read_config_crf["country"],
  345. submission_year=int(read_config_crf["submission_year"]),
  346. submission_date=read_config_crf["submission_date"],
  347. re_read=re_read,
  348. )
  349. return {
  350. "actions": [
  351. (read_CRF,),
  352. (map_folders, ["extracted_data/UNFCCC"]),
  353. ],
  354. "task_dep": ["set_env"],
  355. "verbosity": 2,
  356. "setup": ["in_venv"],
  357. }
  358. #
  359. # def task_read_new_unfccc_crf_for_year():
  360. # """
  361. # Read CRF submission for all countries for given submission year.
  362. #
  363. # By default only reads data not present yet. Only reads the latest updated
  364. # submission for each country.
  365. # """
  366. # actions = [
  367. # f"python src/unfccc_ghg_data/unfccc_crf_reader"
  368. # f"/read_new_unfccc_crf_for_year_datalad.py "
  369. # f"--submission_year={read_config_crf['submission_year']} ",
  370. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  371. # "--folder=extracted_data/UNFCCC",
  372. # ]
  373. # # specifying countries is currently disabled duo to problems with command line
  374. # # list arguments
  375. # # if read_config_crf["countries"] is not None:
  376. # # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  377. # if read_config_crf["re_read"] == "True":
  378. # actions[0] = actions[0] + " --re_read"
  379. # return {
  380. # #'basename': "Read_CRF_year",
  381. # "actions": actions,
  382. # "task_dep": ["set_env"],
  383. # "verbosity": 2,
  384. # "setup": ["in_venv"],
  385. # }
  386. #
  387. #
  388. # def task_test_read_unfccc_crf_for_year():
  389. # """
  390. # Test CRF reading.
  391. #
  392. # Test CRF with a single year only for speed and logging to extend specifications
  393. # if necessary.
  394. # """
  395. # actions = [
  396. # f"python "
  397. # f"src/unfccc_ghg_data/unfccc_crf_reader"
  398. # f"/test_read_unfccc_crf_for_year.py "
  399. # f"--submission_year={read_config_crf['submission_year']} "
  400. # f"--country={read_config_crf['country']} "
  401. # ]
  402. # if read_config_crf["totest"] == "True":
  403. # actions[0] = actions[0] + " --totest"
  404. #
  405. # if read_config_crf["data_year"] is not None:
  406. # actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  407. # return {
  408. # #'basename': "Read_CRF_year",
  409. # "actions": actions,
  410. # "task_dep": ["set_env"],
  411. # "verbosity": 2,
  412. # "setup": ["in_venv"],
  413. # }
  414. #
  415. #
  416. # def task_compile_raw_unfccc_crf_for_year():
  417. # """
  418. # Collect all latest CRF submissions for a given year
  419. #
  420. # Reads the latest data fromt he extracted data folder for each country.
  421. # Notifies the user if new data are available in the downloaded_data folder
  422. # which have not yet been read.
  423. #
  424. # Data are saved in the datasets/UNFCCC/CRFYYYY folder.
  425. # """
  426. # actions = [
  427. # f"python "
  428. # f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  429. # f"--submission_year={read_config_crf['submission_year']} "
  430. # ]
  431. # return {
  432. # "actions": actions,
  433. # "task_dep": ["set_env"],
  434. # "verbosity": 2,
  435. # "setup": ["in_venv"],
  436. # }
  437. #
  438. #
  439. # # tasks for DI reader
  440. # # datalad run is called from within the read_unfccc_di_for_country.py script
  441. # read_config_di = {
  442. # "country": get_var("country", None),
  443. # "date": get_var("date", None),
  444. # "annexI": get_var("annexI", False),
  445. # # "countries": get_var('countries', None),
  446. # }
  447. #
  448. #
  449. # def task_read_unfccc_di_for_country():
  450. # """Read DI data for a country"""
  451. # actions = [
  452. # f"python "
  453. # f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  454. # f"--country={read_config_di['country']}",
  455. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  456. # "--folder=extracted_data/UNFCCC",
  457. # ]
  458. # return {
  459. # "actions": actions,
  460. # "task_dep": ["set_env"],
  461. # "verbosity": 2,
  462. # "setup": ["in_venv"],
  463. # }
  464. #
  465. #
  466. # def task_process_unfccc_di_for_country():
  467. # """Process DI data for a country"""
  468. # actions = [
  469. # f"python "
  470. # f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  471. # f".py "
  472. # f"--country={read_config_di['country']} --date={read_config_di['date']}",
  473. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  474. # "--folder=extracted_data/UNFCCC",
  475. # ]
  476. # return {
  477. # "actions": actions,
  478. # "task_dep": ["set_env"],
  479. # "verbosity": 2,
  480. # "setup": ["in_venv"],
  481. # }
  482. #
  483. #
  484. # def task_read_unfccc_di_for_country_group():
  485. # """Read DI data for a country group"""
  486. # actions = [
  487. # "python "
  488. # "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  489. # ".py",
  490. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  491. # "--folder=extracted_data/UNFCCC",
  492. # ]
  493. # if read_config_di["annexI"] == "True":
  494. # actions[0] = actions[0] + " --annexI"
  495. #
  496. # return {
  497. # "actions": actions,
  498. # "task_dep": ["set_env"],
  499. # "verbosity": 2,
  500. # "setup": ["in_venv"],
  501. # }
  502. #
  503. #
  504. # def task_process_unfccc_di_for_country_group():
  505. # """Process DI data for a country group"""
  506. # actions = [
  507. # "python "
  508. # "src/unfccc_ghg_data/unfccc_di_reader"
  509. # "/process_unfccc_di_for_country_group_datalad"
  510. # ".py",
  511. # ]
  512. # if read_config_di["annexI"] == "True":
  513. # actions[0] = actions[0] + " --annexI"
  514. # if read_config_di["date"] is not None:
  515. # actions[0] = actions[0] + f" --date={read_config_di['date']}"
  516. #
  517. # return {
  518. # "actions": actions,
  519. # "task_dep": ["set_env"],
  520. # "verbosity": 2,
  521. # "setup": ["in_venv"],
  522. # }
  523. #
  524. #
  525. # # general tasks
  526. # def task_country_info():
  527. # """
  528. # Print information on submissions and datasets available for given country
  529. # """
  530. # return {
  531. # "actions": [
  532. # f"python src/unfccc_ghg_data/helper/country_info.py "
  533. # f"--country={read_config['country']}"
  534. # ],
  535. # "task_dep": ["set_env"],
  536. # "verbosity": 2,
  537. # "setup": ["in_venv"],
  538. # }