dodo.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599
  1. """
  2. Define the tasks for UNFCCC data repository
  3. """
  4. import os
  5. import sys
  6. import datalad.api
  7. from doit import get_var
  8. root_path = "."
  9. # TODO: task for folder mapping
  10. # create virtual environment
  11. # def task_setup_venv():
  12. # """Create virtual environment"""
  13. # return {
  14. # 'file_dep': ['requirements_dev.txt', 'setup.cfg', 'pyproject.toml'],
  15. # 'actions': ['python3 -m venv venv',
  16. # './venv/bin/pip install --upgrade pip wheel',
  17. # #'./venv/bin/pip install -Ur unfccc_ghg_data/requirements.txt',
  18. # './venv/bin/pip install --upgrade --upgrade-strategy '
  19. # 'eager -e .[dev]',
  20. # 'touch venv',],
  21. # 'targets': ['venv'],
  22. # 'verbosity': 2,
  23. # }
  24. def set_root_path():
  25. """Set the root folder for the repository"""
  26. os.environ["UNFCCC_GHG_ROOT_PATH"] = root_path
  27. def map_folders(parent_folder):
  28. """
  29. Create or update the folder mapping in the given folder
  30. Internal function
  31. """
  32. datalad.api.run(
  33. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  34. f"--folder={parent_folder}",
  35. dataset=root_path,
  36. message=f"Update folder mapping for {parent_folder}",
  37. outputs=f"{parent_folder}/folder_mapping.json",
  38. dry_run=None,
  39. explicit=True,
  40. )
  41. def task_in_venv():
  42. """
  43. Check if code run from virtual environment and throw an error is not.
  44. Returns
  45. -------
  46. Nothing
  47. """
  48. def in_venv():
  49. if sys.prefix == sys.base_prefix:
  50. raise ValueError( # noqa: TRY003
  51. "You need to run the code from the virtual environment."
  52. )
  53. return {
  54. "actions": [in_venv],
  55. }
  56. # set UNFCCC_GHG_ROOT_PATH environment variable
  57. def task_set_env():
  58. """
  59. Set the environment variable for the module so data is stored in the correct folders
  60. """
  61. return {
  62. "actions": [set_root_path],
  63. }
  64. # Task to create the mapping files which map folder names to ISO 3-letter country codes
  65. read_config_folder = {
  66. "folder": get_var("folder", None),
  67. }
  68. def task_map_folders():
  69. """
  70. Create or update the folder mapping in the given folder
  71. """
  72. return {
  73. "actions": [
  74. set_root_path(),
  75. datalad.api.run(
  76. cmd="python3 src/unfccc_ghg_data/helper/folder_mapping.py "
  77. f"--folder={read_config_folder['folder']}",
  78. dataset=root_path,
  79. message=f"Update folder mapping for {read_config_folder['folder']}",
  80. outputs=f"{read_config_folder['folder']}/folder_mapping.json",
  81. dry_run=None,
  82. explicit=True,
  83. ),
  84. ],
  85. "verbosity": 2,
  86. "setup": ["in_venv"],
  87. }
  88. # Tasks for getting submissions and downloading them
  89. def task_update_bur():
  90. """Update list of BUR submissions"""
  91. return {
  92. "targets": ["downloaded_data/UNFCCC/submissions-bur.csv"],
  93. # "actions": [
  94. # 'datalad run -m "Fetch BUR submissions" '
  95. # "-o downloaded_data/UNFCCC/submissions-bur.csv "
  96. # "python src/unfccc_ghg_data/unfccc_downloader/fetch_submissions_bur.py"
  97. # ],
  98. "actions": [
  99. set_root_path(),
  100. # datalad.api.run(
  101. # cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  102. # "fetch_submissions_bur.py",
  103. # dataset=root_path,
  104. # message="Fetch BUR submissions",
  105. # outputs="downloaded_data/UNFCCC/submissions-bur.csv",
  106. # dry_run=None,
  107. # explicit=True,
  108. # )
  109. ],
  110. "verbosity": 2,
  111. "setup": ["in_venv"],
  112. }
  113. def task_download_bur():
  114. """Download BUR submissions"""
  115. return {
  116. #'file_dep': ['downloaded_data/UNFCCC/submissions-bur.csv'],
  117. # deactivate file_dep fow now as it will always run fetch submissions
  118. # before download
  119. "actions": [
  120. set_root_path(),
  121. datalad.api.run(
  122. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  123. "download_nonannexI.py --category=BUR",
  124. dataset=root_path,
  125. message="Download BUR submissions",
  126. inputs="downloaded_data/UNFCCC/submissions-bur.csv",
  127. dry_run=None,
  128. explicit=False,
  129. ),
  130. map_folders("downloaded_data/UNFCCC"),
  131. ],
  132. "verbosity": 2,
  133. "setup": ["in_venv"],
  134. }
  135. def task_update_nc():
  136. """Update list of NC submissions"""
  137. return {
  138. "targets": ["downloaded_data/UNFCCC/submissions-nc.csv"],
  139. "actions": [
  140. set_root_path(),
  141. datalad.api.run(
  142. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  143. "fetch_submissions_nc.py",
  144. dataset=root_path,
  145. message="Fetch NC submissions",
  146. outputs="downloaded_data/UNFCCC/submissions-nc.csv",
  147. dry_run=None,
  148. explicit=True,
  149. ),
  150. ],
  151. "verbosity": 2,
  152. "setup": ["in_venv"],
  153. }
  154. def task_download_nc():
  155. """Download NC submissions"""
  156. return {
  157. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  158. # deactivate file_dep fow now as it will always run fetch submissions
  159. # before download
  160. "actions": [
  161. set_root_path(),
  162. datalad.api.run(
  163. cmd="python3 src/unfccc_ghg_data/unfccc_downloader/"
  164. "download_nonannexI.py --category=NC",
  165. dataset=root_path,
  166. message="Download NC submissions",
  167. inputs="downloaded_data/UNFCCC/submissions-nc.csv",
  168. dry_run=None,
  169. explicit=False,
  170. ),
  171. map_folders("downloaded_data/UNFCCC"),
  172. ],
  173. "verbosity": 2,
  174. "setup": ["in_venv"],
  175. }
  176. # annexI data: one update call for all data types (as they are on one page)
  177. # but for each year separately.
  178. # downloading is per year and
  179. update_aI_config = {
  180. "year": get_var("year", None),
  181. "category": get_var("category", None),
  182. }
  183. def task_update_annexi():
  184. """Update list of AnnexI submissions"""
  185. return {
  186. "targets": [
  187. f"downloaded_data/UNFCCC/submissions-annexI_{update_aI_config['year']}.csv"
  188. ],
  189. "actions": [
  190. set_root_path(),
  191. datalad.api.run(
  192. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  193. "fetch_submissions_annexI.py "
  194. f"--year={update_aI_config['year']}",
  195. dataset=root_path,
  196. message=f"Fetch AnnexI submissions for {update_aI_config['year']}",
  197. outputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  198. f"{update_aI_config['year']}.csv",
  199. dry_run=None,
  200. explicit=True,
  201. ),
  202. ],
  203. "verbosity": 2,
  204. "setup": ["in_venv"],
  205. }
  206. def task_download_annexi():
  207. """Download AnnexI submissions"""
  208. return {
  209. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  210. # deactivate file_dep fow now as it will always run fetch submissions
  211. # before download
  212. "actions": [
  213. set_root_path(),
  214. datalad.api.run(
  215. cmd="python src/unfccc_ghg_data/unfccc_downloader/download_annexI.py "
  216. f"--category={update_aI_config['category']} "
  217. f"--year={update_aI_config['year']}",
  218. dataset=root_path,
  219. message=f"Download AnnexI submissions for "
  220. f"{update_aI_config['category']}"
  221. f"{update_aI_config['year']}",
  222. inputs=f"downloaded_data/UNFCCC/submissions-annexI_"
  223. f"{update_aI_config['year']}.csv",
  224. dry_run=None,
  225. explicit=False,
  226. ),
  227. map_folders("downloaded_data/UNFCCC"),
  228. ],
  229. "verbosity": 2,
  230. "setup": ["in_venv"],
  231. }
  232. # annexI data: one update call for all data types (as they are on one page)
  233. # but for each year separately.
  234. # downloading is per year and
  235. update_btr_config = {
  236. "round": get_var("round", None),
  237. }
  238. def task_update_btr():
  239. """Update list of BTR submissions"""
  240. return {
  241. "targets": [
  242. f"downloaded_data/UNFCCC/submissions-BTR{update_btr_config['round']}.csv"
  243. ],
  244. "actions": [
  245. set_root_path(),
  246. datalad.api.run(
  247. cmd="python src/unfccc_ghg_data/unfccc_downloader/"
  248. "fetch_submissions_btr.py "
  249. f"--round={update_btr_config['round']}",
  250. dataset=root_path,
  251. message=f"Fetch Biannial Transparency Report submissions for "
  252. f"BTR{update_btr_config['round']}",
  253. outputs=f"downloaded_data/UNFCCC/submissions-BTR"
  254. f"{update_btr_config['round']}.csv",
  255. dry_run=None,
  256. explicit=True,
  257. ),
  258. ],
  259. "verbosity": 2,
  260. "setup": ["in_venv"],
  261. }
  262. def task_download_btr():
  263. """Download BTR submissions"""
  264. return {
  265. #'file_dep': ['downloaded_data/UNFCCC/submissions-nc.csv'],
  266. # deactivate file_dep fow now as it will always run fetch submissions
  267. # before download
  268. "actions": [
  269. set_root_path(),
  270. datalad.api.run(
  271. cmd="src/unfccc_ghg_data/unfccc_downloader/download_btr.py "
  272. f"--round={update_btr_config['round']}",
  273. dataset=root_path,
  274. message="Download BTR submissions for "
  275. f"BTR{update_btr_config['round']}",
  276. inputs=f"downloaded_data/UNFCCC/submissions-BTR"
  277. f"{update_btr_config['round']}.csv",
  278. dry_run=None,
  279. explicit=False,
  280. ),
  281. map_folders("downloaded_data/UNFCCC"),
  282. ],
  283. "verbosity": 2,
  284. "setup": ["in_venv"],
  285. }
  286. def task_download_ndc():
  287. """Download NDC submissions"""
  288. return {
  289. "actions": [
  290. set_root_path(),
  291. datalad.api.run(
  292. cmd="src/unfccc_ghg_data/unfccc_downloader/download_ndc.py",
  293. dataset=root_path,
  294. message="Download NDC submissions",
  295. inputs=None,
  296. dry_run=None,
  297. explicit=False,
  298. ),
  299. map_folders("downloaded_data/UNFCCC"),
  300. ],
  301. "verbosity": 2,
  302. "setup": ["in_venv"],
  303. }
  304. # read UNFCCC submissions.
  305. # datalad run is called from within the read_UNFCCC_submission.py script
  306. read_config = {
  307. "country": get_var("country", None),
  308. "submission": get_var("submission", None),
  309. }
  310. # TODO: make individual task for non-UNFCCC submissions
  311. def task_read_unfccc_submission():
  312. """Read submission for a country (if code exists) (not for CRF)"""
  313. return {
  314. # "actions": [
  315. # f"python src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py "
  316. # f"--country={read_config['country']} --submission={read_config['submission']}",
  317. # "python src/unfccc_ghg_data/helper/folder_mapping.py "
  318. # "--folder=extracted_data/UNFCCC",
  319. # ],
  320. "actions": [
  321. set_root_path(),
  322. datalad.api.run(
  323. cmd="src/unfccc_ghg_data/unfccc_reader/read_UNFCCC_submission.py"
  324. f"--country={read_config['country']} "
  325. f"--submission={read_config['submission']}",
  326. dataset=root_path,
  327. message=f"Read {read_config['submission']} for country "
  328. f"{read_config['country']}",
  329. dry_run=None,
  330. explicit=False,
  331. ),
  332. map_folders("extracted_data/UNFCCC"),
  333. ],
  334. "verbosity": 2,
  335. "setup": ["in_venv"],
  336. }
  337. # read UNFCCC submissions.
  338. # datalad run is called from within the read_UNFCCC_submission.py script
  339. read_config_crf = {
  340. "country": get_var("country", None),
  341. "submission_year": get_var("submission_year", None),
  342. "submission_date": get_var("submission_date", None),
  343. "re_read": get_var("re_read", False),
  344. "countries": get_var("countries", None),
  345. "data_year": get_var("data_year", None),
  346. "totest": get_var("totest", None),
  347. }
  348. def task_read_unfccc_crf_submission():
  349. """Read CRF submission for a country"""
  350. actions = [
  351. f"python src/unfccc_ghg_data/unfccc_crf_reader"
  352. f"/read_unfccc_crf_submission_datalad.py "
  353. f"--country={read_config_crf['country']} "
  354. f"--submission_year={read_config_crf['submission_year']} "
  355. f"--submission_date={read_config_crf['submission_date']} ",
  356. "python src/unfccc_ghg_data/helper/folder_mapping.py "
  357. "--folder=extracted_data/UNFCCC",
  358. ]
  359. if read_config_crf["re_read"] == "True":
  360. actions[0] = actions[0] + " --re_read"
  361. return {
  362. "actions": actions,
  363. "task_dep": ["set_env"],
  364. "verbosity": 2,
  365. "setup": ["in_venv"],
  366. }
  367. def task_read_new_unfccc_crf_for_year():
  368. """
  369. Read CRF submission for all countries for given submission year.
  370. By default only reads data not present yet. Only reads the latest updated
  371. submission for each country.
  372. """
  373. actions = [
  374. f"python src/unfccc_ghg_data/unfccc_crf_reader"
  375. f"/read_new_unfccc_crf_for_year_datalad.py "
  376. f"--submission_year={read_config_crf['submission_year']} ",
  377. "python src/unfccc_ghg_data/helper/folder_mapping.py "
  378. "--folder=extracted_data/UNFCCC",
  379. ]
  380. # specifying countries is currently disabled duo to problems with command line
  381. # list arguments
  382. # if read_config_crf["countries"] is not None:
  383. # actions[0] = actions[0] + f"--countries={read_config_crf['countries']} "
  384. if read_config_crf["re_read"] == "True":
  385. actions[0] = actions[0] + " --re_read"
  386. return {
  387. #'basename': "Read_CRF_year",
  388. "actions": actions,
  389. "task_dep": ["set_env"],
  390. "verbosity": 2,
  391. "setup": ["in_venv"],
  392. }
  393. def task_test_read_unfccc_crf_for_year():
  394. """
  395. Test CRF reading.
  396. Test CRF with a single year only for speed and logging to extend specifications
  397. if necessary.
  398. """
  399. actions = [
  400. f"python "
  401. f"src/unfccc_ghg_data/unfccc_crf_reader"
  402. f"/test_read_unfccc_crf_for_year.py "
  403. f"--submission_year={read_config_crf['submission_year']} "
  404. f"--country={read_config_crf['country']} "
  405. ]
  406. if read_config_crf["totest"] == "True":
  407. actions[0] = actions[0] + " --totest"
  408. if read_config_crf["data_year"] is not None:
  409. actions[0] = actions[0] + f"--data_year={read_config_crf['data_year']} "
  410. return {
  411. #'basename': "Read_CRF_year",
  412. "actions": actions,
  413. "task_dep": ["set_env"],
  414. "verbosity": 2,
  415. "setup": ["in_venv"],
  416. }
  417. def task_compile_raw_unfccc_crf_for_year():
  418. """
  419. Collect all latest CRF submissions for a given year
  420. Reads the latest data fromt he extracted data folder for each country.
  421. Notifies the user if new data are available in the downloaded_data folder
  422. which have not yet been read.
  423. Data are saved in the datasets/UNFCCC/CRFYYYY folder.
  424. """
  425. actions = [
  426. f"python "
  427. f"src/unfccc_ghg_data/unfccc_crf_reader/crf_raw_for_year.py "
  428. f"--submission_year={read_config_crf['submission_year']} "
  429. ]
  430. return {
  431. "actions": actions,
  432. "task_dep": ["set_env"],
  433. "verbosity": 2,
  434. "setup": ["in_venv"],
  435. }
  436. # tasks for DI reader
  437. # datalad run is called from within the read_unfccc_di_for_country.py script
  438. read_config_di = {
  439. "country": get_var("country", None),
  440. "date": get_var("date", None),
  441. "annexI": get_var("annexI", False),
  442. # "countries": get_var('countries', None),
  443. }
  444. def task_read_unfccc_di_for_country():
  445. """Read DI data for a country"""
  446. actions = [
  447. f"python "
  448. f"src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_datalad.py "
  449. f"--country={read_config_di['country']}",
  450. "python src/unfccc_ghg_data/helper/folder_mapping.py "
  451. "--folder=extracted_data/UNFCCC",
  452. ]
  453. return {
  454. "actions": actions,
  455. "task_dep": ["set_env"],
  456. "verbosity": 2,
  457. "setup": ["in_venv"],
  458. }
  459. def task_process_unfccc_di_for_country():
  460. """Process DI data for a country"""
  461. actions = [
  462. f"python "
  463. f"src/unfccc_ghg_data/unfccc_di_reader/process_unfccc_di_for_country_datalad"
  464. f".py "
  465. f"--country={read_config_di['country']} --date={read_config_di['date']}",
  466. "python src/unfccc_ghg_data/helper/folder_mapping.py "
  467. "--folder=extracted_data/UNFCCC",
  468. ]
  469. return {
  470. "actions": actions,
  471. "task_dep": ["set_env"],
  472. "verbosity": 2,
  473. "setup": ["in_venv"],
  474. }
  475. def task_read_unfccc_di_for_country_group():
  476. """Read DI data for a country group"""
  477. actions = [
  478. "python "
  479. "src/unfccc_ghg_data/unfccc_di_reader/read_unfccc_di_for_country_group_datalad"
  480. ".py",
  481. "python src/unfccc_ghg_data/helper/folder_mapping.py "
  482. "--folder=extracted_data/UNFCCC",
  483. ]
  484. if read_config_di["annexI"] == "True":
  485. actions[0] = actions[0] + " --annexI"
  486. return {
  487. "actions": actions,
  488. "task_dep": ["set_env"],
  489. "verbosity": 2,
  490. "setup": ["in_venv"],
  491. }
  492. def task_process_unfccc_di_for_country_group():
  493. """Process DI data for a country group"""
  494. actions = [
  495. "python "
  496. "src/unfccc_ghg_data/unfccc_di_reader"
  497. "/process_unfccc_di_for_country_group_datalad"
  498. ".py",
  499. ]
  500. if read_config_di["annexI"] == "True":
  501. actions[0] = actions[0] + " --annexI"
  502. if read_config_di["date"] is not None:
  503. actions[0] = actions[0] + f" --date={read_config_di['date']}"
  504. return {
  505. "actions": actions,
  506. "task_dep": ["set_env"],
  507. "verbosity": 2,
  508. "setup": ["in_venv"],
  509. }
  510. # general tasks
  511. def task_country_info():
  512. """
  513. Print information on submissions and datasets available for given country
  514. """
  515. return {
  516. "actions": [
  517. f"python src/unfccc_ghg_data/helper/country_info.py "
  518. f"--country={read_config['country']}"
  519. ],
  520. "task_dep": ["set_env"],
  521. "verbosity": 2,
  522. "setup": ["in_venv"],
  523. }