name: download on: schedule: - cron: "37 4 7 * *" # monthly on the seventh day of the month at 04:37 workflow_dispatch: # if run manually jobs: download: runs-on: ubuntu-22.04 steps: - name: set download date run: echo "DOWNLOAD_DATE=$(date +"%Y-%m-%d")" >> $GITHUB_ENV - name: install datalad uses: awalsh128/cache-apt-pkgs-action@v1.4.2 with: packages: datalad version: 1.0 - name: install ssh key for gin.hemio.de access uses: shimataro/ssh-key-action@v2 with: key: ${{ secrets.GIN_HEMIO_SSH_KEY }} name: id_ginhemio known_hosts: | gin.hemio.de ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIPA+r/S2vn2JmN9z62e/vla/pDgaEfFUmghlOP7idyP9 config: | Host gin.hemio.de User git IdentityFile ~/.ssh/id_ginhemio IdentitiesOnly yes - name: configure git user run: git config --global user.email "mika.pflueger@climate-resource.com" - name: configure git email run: git config --global user.name "Mika Pflüger (via github actions)" - name: clone repo run: datalad clone https://github.com/mikapfl/unfccc_di_data.git - name: configure push access to gin.hemio.de run: | cd unfccc_di_data/ datalad siblings add -s ginhemio --url https://gin.hemio.de/CR/unfcc_di_data --pushurl gin.hemio.de:/CR/unfcc_di_data git config remote.ginhemio.annex-ignore false - name: setup Python uses: actions/setup-python@v5 with: python-version: '3.11' - name: install requirements run: pip install -r unfccc_di_data/requirements.txt - name: update to latest data from gin.hemio.de run: datalad -C unfccc_di_data/ update -s ginhemio --how merge - name: download data run: datalad -C unfccc_di_data/ run -o data/annexI/*.csv.gz -o data/non-annexI/*.csv.gz -o data/all.parquet python download.py - name: publish newly downloaded data run: | cd unfccc_di_data/ datalad update -s ginhemio --how merge datalad push --to ginhemio --data anything