dodo.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621
  1. """
  2. Define the tasks for UNFCCC data repository
  3. """
  4. import os
  5. import sys
  6. import datalad.api
  7. from doit import get_var
  8. root_path = "."
  9. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  10. # TODO: task for folder mapping
  11. # create virtual environment
  12. # def task_setup_venv():
  13. # """Create virtual environment"""
  14. # return {
  15. # 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  16. # 'actions': ['python3 -m venv venv',
  17. # './venv/bin/pip install --upgrade pip wheel',
  18. # #'./venv/bin/pip install -Ur unfccc_ghg_data/requirements.txt',
  19. # './venv/bin/pip install --upgrade --upgrade-strategy '
  20. # 'eager -e .[dev]',
  21. # 'touch venv',],
  22. # 'targets': ['venv'],
  23. # 'verbosity': 2,
  24. # }
  25. def set_root_path():
  26. """Set the root folder for the repository"""
  27. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  28. def map_folders(parent_folder):
  29. """
  30. Create or update the folder mapping in the given folder
  31. Internal function
  32. """
  33. datalad.api.run(
  34. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  35. f"--folder={parent_folder}",
  36. dataset=root_path,
  37. message=f"Update folder mapping for {parent_folder}",
  38. outputs=f"{parent_folder}/folder_mapping.json",
  39. dry_run=None,
  40. explicit=True,
  41. )
  42. def task_in_venv():
  43. """
  44. Check if code run from virtual environment and throw an error is not.
  45. Returns
  46. -------
  47. Nothing
  48. """
  49. def in_venv():
  50. if sys.prefix == sys.base_prefix:
  51. raise ValueError( # noqa: TRY003
  52. "You need to run the code from the virtual environment."
  53. )
  54. return {
  55. "actions": [in_venv],
  56. }
  57. # set UNFCCC_GHG_ROOT_PATH environment variable
  58. def task_set_env():
  59. """
  60. Set the environment variable for the module so data is stored in the correct folders
  61. """
  62. return {
  63. "actions": [set_root_path],
  64. }
  65. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  66. read_config_folder = {
  67. "folder": get_var("folder", None),
  68. }
  69. def task_map_folders():
  70. """
  71. Create or update the folder mapping in the given folder
  72. """
  73. return {
  74. "actions": [(map_folders, [read_config_folder["folder"]])],
  75. "verbosity": 2,
  76. "setup": ["in_venv"],
  77. }
  78. # Tasks for getting submissions and downloading them
  79. def task_update_bur():
  80. """Update list of BUR submissions"""
  81. def fetch_bur():
  82. datalad.api.run(
  83. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  84. "fetch_submissions_bur.py",
  85. dataset=root_path,
  86. message="Fetch BUR submissions",
  87. outputs="downloaded_data/UNFCCC/submissions-bur.csv",
  88. dry_run=None,
  89. explicit=True,
  90. )
  91. return {
  92. "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
  93. "actions": [
  94. (fetch_bur,),
  95. ],
  96. "verbosity": 2,
  97. "setup": ["in_venv"],
  98. }
  99. def task_download_bur():
  100. """Download BUR submissions"""
  101. def download_bur():
  102. (
  103. datalad.api.run(
  104. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  105. "download_nonannexI.py --category=BUR",
  106. dataset=root_path,
  107. message="Download BUR submissions",
  108. inputs="downloaded_data/UNFCCC/submissions-bur.csv",
  109. dry_run=None,
  110. explicit=False,
  111. ),
  112. )
  113. return {
  114. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  115. # deactivate file_dep fow now as it will always run fetch submissions
  116. # before download
  117. "actions": [
  118. (download_bur,),
  119. (map_folders, ["downloaded_data/UNFCCC"]),
  120. ],
  121. "verbosity": 2,
  122. "setup": ["in_venv"],
  123. }
  124. def task_update_nc():
  125. """Update list of NC submissions"""
  126. def fetch_nc():
  127. datalad.api.run(
  128. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  129. "fetch_submissions_nc.py",
  130. dataset=root_path,
  131. message="Fetch NC submissions",
  132. outputs="downloaded_data/UNFCCC/submissions-nc.csv",
  133. dry_run=None,
  134. explicit=True,
  135. )
  136. return {
  137. "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
  138. "actions": [
  139. (fetch_nc,),
  140. ],
  141. "verbosity": 2,
  142. "setup": ["in_venv"],
  143. }
  144. def task_download_nc():
  145. """Download BUR submissions"""
  146. def download_nc():
  147. (
  148. datalad.api.run(
  149. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  150. "download_nonannexI.py --category=NC",
  151. dataset=root_path,
  152. message="Download NC submissions",
  153. inputs="downloaded_data/UNFCCC/submissions-nc.csv",
  154. dry_run=None,
  155. explicit=False,
  156. ),
  157. )
  158. return {
  159. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  160. # deactivate file_dep fow now as it will always run fetch submissions
  161. # before download
  162. "actions": [
  163. (download_nc,),
  164. (map_folders, ["downloaded_data/UNFCCC"]),
  165. ],
  166. "verbosity": 2,
  167. "setup": ["in_venv"],
  168. }
  169. # annexI data: one update call for all data types (as they are on one page)
  170. # but for each year separately.
  171. # downloading is per year and
  172. update_aI_config = {
  173. "year": get_var("year", None),
  174. "category": get_var("category", None),
  175. }
  176. def task_update_annexi():
  177. """Update list of AnnexI submissions"""
  178. def fetch_annexi():
  179. (
  180. datalad.api.run(
  181. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  182. "fetch_submissions_annexI.py "
  183. f"--year={update_aI_config['year']}",
  184. dataset=root_path,
  185. message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
  186. outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  187. f"{update_aI_config['year']}.csv",
  188. dry_run=None,
  189. explicit=True,
  190. ),
  191. )
  192. return {
  193. "targets": [
  194. f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
  195. ],
  196. "actions": [
  197. (fetch_annexi,),
  198. ],
  199. "verbosity": 2,
  200. "setup": ["in_venv"],
  201. }
  202. def task_download_annexi():
  203. """Download AnnexI submissions"""
  204. def download_annexi():
  205. (
  206. datalad.api.run(
  207. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  208. f"--category={update_aI_config['category']} "
  209. f"--year={update_aI_config['year']}",
  210. dataset=root_path,
  211. message=f"Download AnnexI submissions for "
  212. f"{update_aI_config['category']}"
  213. f"{update_aI_config['year']}",
  214. inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  215. f"{update_aI_config['year']}.csv",
  216. dry_run=None,
  217. explicit=False,
  218. ),
  219. )
  220. return {
  221. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-annex1_"
  222. # f"{update_aI_config['year']}.csv"],
  223. # deactivate file_dep fow now as it will always run fetch submissions
  224. # before download
  225. "actions": [
  226. (download_annexi,),
  227. (map_folders, ["downloaded_data/UNFCCC"]),
  228. ],
  229. "verbosity": 2,
  230. "setup": ["in_venv"],
  231. }
  232. # BTR data: one update call for all data types (as they are on one page)
  233. # but for each submission round separately.
  234. # downloading is per submission round
  235. update_btr_config = {
  236. "round": get_var("round", None),
  237. }
  238. def task_update_btr():
  239. """Update list of BTR submissions"""
  240. def fetch_btr():
  241. (
  242. datalad.api.run(
  243. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  244. "fetch_submissions_btr.py "
  245. f"--round={update_btr_config['round']}",
  246. dataset=root_path,
  247. message=f"Fetch Biannial Transparency Report submissions for "
  248. f"BTR{update_btr_config['round']}",
  249. outputs=f"downloaded_data/UNFCCC/submissions-BTR"
  250. f"{update_btr_config['round']}.csv",
  251. dry_run=None,
  252. explicit=True,
  253. ),
  254. )
  255. return {
  256. "targets": [
  257. f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
  258. ],
  259. "actions": [
  260. (fetch_btr,),
  261. ],
  262. "verbosity": 2,
  263. "setup": ["in_venv"],
  264. }
  265. def task_download_btr():
  266. """Download BTR submissions"""
  267. def download_btr():
  268. (
  269. datalad.api.run(
  270. cmd="src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
  271. f"--round={update_btr_config['round']}",
  272. dataset=root_path,
  273. message="Download BTR submissions for "
  274. f"BTR{update_btr_config['round']}",
  275. inputs=f"downloaded_data/UNFCCC/submissions-BTR"
  276. f"{update_btr_config['round']}.csv",
  277. dry_run=None,
  278. explicit=False,
  279. ),
  280. )
  281. return {
  282. # 'file_dep': [f"downloaded_data/UNFCCC/submissions-btr.csv "
  283. # f"{update_btr_config['round']}.csv"],
  284. # deactivate file_dep fow now as it will always run fetch submissions
  285. # before download
  286. "actions": [
  287. (download_btr,),
  288. (map_folders, ["downloaded_data/UNFCCC"]),
  289. ],
  290. "verbosity": 2,
  291. "setup": ["in_venv"],
  292. }
  293. def task_download_ndc():
  294. """Download NDC submissions"""
  295. def download_ndc():
  296. (
  297. datalad.api.run(
  298. cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
  299. dataset=root_path,
  300. message="Download NDC submissions",
  301. inputs=None,
  302. dry_run=None,
  303. explicit=False,
  304. ),
  305. )
  306. return {
  307. "actions": [
  308. (download_ndc,),
  309. (map_folders, ["downloaded_data/UNFCCC"]),
  310. ],
  311. "verbosity": 2,
  312. "setup": ["in_venv"],
  313. }
  314. # read UNFCCC submissions.
  315. # datalad run is called from within the read_UNFCCC_submission.py script
  316. read_config = {
  317. "country": get_var("country", None),
  318. "submission": get_var("submission", None),
  319. }
  320. # TODO: make individual task for non-UNFCCC submissions
  321. def task_read_unfccc_submission():
  322. """Read submission for a country (if code exists) (not for CRF)
  323. Datalad is called from `read_UNFCCC_submission`, so we can just call this script
  324. here.
  325. TODO: check if it makes sense to convert script to function
  326. """
  327. return {
  328. "actions": [
  329. f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  330. f"--country={read_config['country']} "
  331. f"--submission={read_config['submission']}",
  332. (map_folders, ["extracted_data/UNFCCC"]),
  333. ],
  334. "verbosity": 2,
  335. "setup": ["in_venv"],
  336. }
  337. #
  338. # # read UNFCCC submissions.
  339. # # datalad run is called from within the read_UNFCCC_submission.py script
  340. # read_config_crf = {
  341. # "country": get_var("country", None),
  342. # "submission_year": get_var("submission_year", None),
  343. # "submission_date": get_var("submission_date", None),
  344. # "re_read": get_var("re_read", False),
  345. # "countries": get_var("countries", None),
  346. # "data_year": get_var("data_year", None),
  347. # "totest": get_var("totest", None),
  348. # }
  349. #
  350. #
  351. # def task_read_unfccc_crf_submission():
  352. # """Read CRF submission for a country"""
  353. # actions = [
  354. # f"python src/unfccc_ghg_data/unfccc_crf_reader"
  355. # f"/read_unfccc_crf_submission_datalad.py "
  356. # f"--country={read_config_crf['country']} "
  357. # f"--submission_year={read_config_crf['submission_year']} "
  358. # f"--submission_date={read_config_crf['submission_date']} ",
  359. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  360. # "--folder=extracted_data/UNFCCC",
  361. # ]
  362. # if read_config_crf["re_read"] == "True":
  363. # actions[0] = actions[0] + " --re_read"
  364. # return {
  365. # "actions": actions,
  366. # "task_dep": ["set_env"],
  367. # "verbosity": 2,
  368. # "setup": ["in_venv"],
  369. # }
  370. #
  371. #
  372. # def task_read_new_unfccc_crf_for_year():
  373. # """
  374. # Read CRF submission for all countries for given submission year.
  375. #
  376. # By default only reads data not present yet. Only reads the latest updated
  377. # submission for each country.
  378. # """
  379. # actions = [
  380. # f"python src/unfccc_ghg_data/unfccc_crf_reader"
  381. # f"/read_new_unfccc_crf_for_year_datalad.py "
  382. # f"--submission_year={read_config_crf['submission_year']} ",
  383. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  384. # "--folder=extracted_data/UNFCCC",
  385. # ]
  386. # # specifying countries is currently disabled duo to problems with command line
  387. # # list arguments
  388. # # if read_config_crf["countries"] is not None:
  389. # # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  390. # if read_config_crf["re_read"] == "True":
  391. # actions[0] = actions[0] + " --re_read"
  392. # return {
  393. # #'basename': "Read_CRF_year",
  394. # "actions": actions,
  395. # "task_dep": ["set_env"],
  396. # "verbosity": 2,
  397. # "setup": ["in_venv"],
  398. # }
  399. #
  400. #
  401. # def task_test_read_unfccc_crf_for_year():
  402. # """
  403. # Test CRF reading.
  404. #
  405. # Test CRF with a single year only for speed and logging to extend specifications
  406. # if necessary.
  407. # """
  408. # actions = [
  409. # f"python "
  410. # f"src/unfccc_ghg_data/unfccc_crf_reader"
  411. # f"/test_read_unfccc_crf_for_year.py "
  412. # f"--submission_year={read_config_crf['submission_year']} "
  413. # f"--country={read_config_crf['country']} "
  414. # ]
  415. # if read_config_crf["totest"] == "True":
  416. # actions[0] = actions[0] + " --totest"
  417. #
  418. # if read_config_crf["data_year"] is not None:
  419. # actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  420. # return {
  421. # #'basename': "Read_CRF_year",
  422. # "actions": actions,
  423. # "task_dep": ["set_env"],
  424. # "verbosity": 2,
  425. # "setup": ["in_venv"],
  426. # }
  427. #
  428. #
  429. # def task_compile_raw_unfccc_crf_for_year():
  430. # """
  431. # Collect all latest CRF submissions for a given year
  432. #
  433. # Reads the latest data fromt he extracted data folder for each country.
  434. # Notifies the user if new data are available in the downloaded_data folder
  435. # which have not yet been read.
  436. #
  437. # Data are saved in the datasets/UNFCCC/CRFYYYY folder.
  438. # """
  439. # actions = [
  440. # f"python "
  441. # f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  442. # f"--submission_year={read_config_crf['submission_year']} "
  443. # ]
  444. # return {
  445. # "actions": actions,
  446. # "task_dep": ["set_env"],
  447. # "verbosity": 2,
  448. # "setup": ["in_venv"],
  449. # }
  450. #
  451. #
  452. # # tasks for DI reader
  453. # # datalad run is called from within the read_unfccc_di_for_country.py script
  454. # read_config_di = {
  455. # "country": get_var("country", None),
  456. # "date": get_var("date", None),
  457. # "annexI": get_var("annexI", False),
  458. # # "countries": get_var('countries', None),
  459. # }
  460. #
  461. #
  462. # def task_read_unfccc_di_for_country():
  463. # """Read DI data for a country"""
  464. # actions = [
  465. # f"python "
  466. # f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  467. # f"--country={read_config_di['country']}",
  468. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  469. # "--folder=extracted_data/UNFCCC",
  470. # ]
  471. # return {
  472. # "actions": actions,
  473. # "task_dep": ["set_env"],
  474. # "verbosity": 2,
  475. # "setup": ["in_venv"],
  476. # }
  477. #
  478. #
  479. # def task_process_unfccc_di_for_country():
  480. # """Process DI data for a country"""
  481. # actions = [
  482. # f"python "
  483. # f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  484. # f".py "
  485. # f"--country={read_config_di['country']} --date={read_config_di['date']}",
  486. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  487. # "--folder=extracted_data/UNFCCC",
  488. # ]
  489. # return {
  490. # "actions": actions,
  491. # "task_dep": ["set_env"],
  492. # "verbosity": 2,
  493. # "setup": ["in_venv"],
  494. # }
  495. #
  496. #
  497. # def task_read_unfccc_di_for_country_group():
  498. # """Read DI data for a country group"""
  499. # actions = [
  500. # "python "
  501. # "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  502. # ".py",
  503. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  504. # "--folder=extracted_data/UNFCCC",
  505. # ]
  506. # if read_config_di["annexI"] == "True":
  507. # actions[0] = actions[0] + " --annexI"
  508. #
  509. # return {
  510. # "actions": actions,
  511. # "task_dep": ["set_env"],
  512. # "verbosity": 2,
  513. # "setup": ["in_venv"],
  514. # }
  515. #
  516. #
  517. # def task_process_unfccc_di_for_country_group():
  518. # """Process DI data for a country group"""
  519. # actions = [
  520. # "python "
  521. # "src/unfccc_ghg_data/unfccc_di_reader"
  522. # "/process_unfccc_di_for_country_group_datalad"
  523. # ".py",
  524. # ]
  525. # if read_config_di["annexI"] == "True":
  526. # actions[0] = actions[0] + " --annexI"
  527. # if read_config_di["date"] is not None:
  528. # actions[0] = actions[0] + f" --date={read_config_di['date']}"
  529. #
  530. # return {
  531. # "actions": actions,
  532. # "task_dep": ["set_env"],
  533. # "verbosity": 2,
  534. # "setup": ["in_venv"],
  535. # }
  536. #
  537. #
  538. # # general tasks
  539. # def task_country_info():
  540. # """
  541. # Print information on submissions and datasets available for given country
  542. # """
  543. # return {
  544. # "actions": [
  545. # f"python src/unfccc_ghg_data/helper/country_info.py "
  546. # f"--country={read_config['country']}"
  547. # ],
  548. # "task_dep": ["set_env"],
  549. # "verbosity": 2,
  550. # "setup": ["in_venv"],
  551. # }