From 9e1f8fcb65580031455d12b432fe064baae4b4ee Mon Sep 17 00:00:00 2001 From: Mary Marchini Date: Sun, 28 Jun 2020 19:40:51 -0700 Subject: [PATCH] build: implement a Commit Queue in Actions This is a (still experimental) implementation of a Commit Queue on GitHub Actions, using labels and the scheduler event to land Pull Requests. It uses `node-core-utils` to validate Pull Requests and to prepare the commit message, and then it uses a GitHub personal token to push changes back to the repository. If the Queue fails to land a Pull Request, that PR will be removed from the queue and the `node-core-utils` output will be pasted in the Pull Request. An overview of the implementation is provided in doc/guides/commit-queue.md, as well as current limitations. Ref: /~https://github.com/mmarchini-oss/automated-merge-test Ref: /~https://github.com/nodejs/build/issues/2201 PR-URL: /~https://github.com/nodejs/node/pull/34112 Refs: /~https://github.com/mmarchini-oss/automated-merge-test Refs: /~https://github.com/nodejs/build/issues/2201 Reviewed-By: Ben Noordhuis Reviewed-By: Denys Otrishko Reviewed-By: Anna Henningsen Reviewed-By: James M Snell Reviewed-By: Shelley Vohr Reviewed-By: Matteo Collina Reviewed-By: Richard Lau --- .github/workflows/auto-start-ci.yml | 10 +-- .github/workflows/commit-queue.yml | 81 ++++++++++++++++++++ doc/guides/commit-queue.md | 115 ++++++++++++++++++++++++++++ tools/actions/commit-queue.sh | 85 ++++++++++++++++++++ 4 files changed, 285 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/commit-queue.yml create mode 100644 doc/guides/commit-queue.md create mode 100755 tools/actions/commit-queue.sh diff --git a/.github/workflows/auto-start-ci.yml b/.github/workflows/auto-start-ci.yml index 0bc9e7c1c65fc2..cb9318ab4ade9f 100644 --- a/.github/workflows/auto-start-ci.yml +++ b/.github/workflows/auto-start-ci.yml @@ -3,12 +3,10 @@ name: Auto Start CI on: schedule: - # `schedule` event is used instead of `pull_request` because when a - # `pull_requesst` event is triggered on a PR from a fork, GITHUB_TOKEN will - # be read-only, and the Action won't have access to any other repository - # secrets, which it needs to access Jenkins API. Runs every five minutes - # (fastest the scheduler can run). Five minutes is optimistic, it can take - # longer to run. + # Runs every five minutes (fastest the scheduler can run). Five minutes is + # optimistic, it can take longer to run. + # To understand why `schedule` is used instead of other events, refer to + # ./doc/guides/commit-queue.md - cron: "*/5 * * * *" jobs: diff --git a/.github/workflows/commit-queue.yml b/.github/workflows/commit-queue.yml new file mode 100644 index 00000000000000..9db520d5c35a59 --- /dev/null +++ b/.github/workflows/commit-queue.yml @@ -0,0 +1,81 @@ +--- +# This action requires the following secrets to be set on the repository: +# GH_USER_NAME: GitHub user whose Jenkins and GitHub token are defined below +# GH_USER_TOKEN: GitHub user token, to be used by ncu and to push changes +# JENKINS_TOKEN: Jenkins token, to be used to check CI status + +name: Commit Queue + +on: + # `schedule` event is used instead of `pull_request` because when a + # `pull_request` event is triggered on a PR from a fork, GITHUB_TOKEN will + # be read-only, and the Action won't have access to any other repository + # secrets, which it needs to access Jenkins API. + schedule: + - cron: "*/5 * * * *" + +jobs: + commitQueue: + if: github.repository == 'nodejs/node' + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + with: + # A personal token is required because pushing with GITHUB_TOKEN will + # prevent commits from running CI after they land on master. It needs + # to be set here because `checkout` configures GitHub authentication + # for push as well. + token: ${{ secrets.GH_USER_TOKEN }} + + # Install dependencies + - name: Install Node.js + uses: actions/setup-node@v2-beta + with: + node-version: '12' + - name: Install dependencies + run: | + sudo apt-get install jq -y + # TODO(mmarchini): install from npm after next ncu release is out + npm install -g '/~https://github.com/mmarchini/node-core-utils#commit-queue-branch' + # npm install -g node-core-utils + + - name: Set variables + run: | + echo "::set-env name=REPOSITORY::$(echo ${{ github.repository }} | cut -d/ -f2)" + echo "::set-env name=OWNER::${{ github.repository_owner }}" + + - name: Get Pull Requests + uses: octokit/graphql-action@v2.x + id: get_mergable_pull_requests + with: + query: | + query release($owner:String!,$repo:String!, $base_ref:String!) { + repository(owner:$owner, name:$repo) { + pullRequests(baseRefName: $base_ref, labels: ["commit-queue"], states: OPEN, last: 100) { + nodes { + number + } + } + } + } + owner: ${{ env.OWNER }} + repo: ${{ env.REPOSITORY }} + # Commit queue is only enabled for the default branch on the repository + # TODO(mmarchini): get the default branch programmatically instead of + # assuming `master` + base_ref: "master" + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + - name: Configure node-core-utils + run: | + ncu-config set branch master + ncu-config set upstream origin + ncu-config set username "${{ secrets.GH_USER_NAME }}" + ncu-config set token "${{ secrets.GH_USER_TOKEN }}" + ncu-config set jenkins_token "${{ secrets.JENKINS_TOKEN }}" + ncu-config set repo "${{ env.REPOSITORY }}" + ncu-config set owner "${{ env.OWNER }}" + + - name: Start the commit queue + run: ./tools/actions/commit-queue.sh ${{ env.OWNER }} ${{ env.REPOSITORY }} ${{ secrets.GITHUB_TOKEN }} $(echo '${{ steps.get_mergable_pull_requests.outputs.data }}' | jq '.repository.pullRequests.nodes | map(.number) | .[]') diff --git a/doc/guides/commit-queue.md b/doc/guides/commit-queue.md new file mode 100644 index 00000000000000..7256286846970e --- /dev/null +++ b/doc/guides/commit-queue.md @@ -0,0 +1,115 @@ +# Commit Queue + +> Stability: 1 - Experimental + +*tl;dr: You can land Pull Requests by adding the `commit-queue` label to it.* + +Commit Queue is an experimental feature for the project which simplifies the +landing process by automating it via GitHub Actions. With it, Collaborators are +able to land Pull Requests by adding the `commit-queue` label to a PR. All +checks will run via node-core-utils, and if the Pull Request is ready to land, +the Action will rebase it and push to master. + +This document gives an overview on how the Commit Queue works, as well as +implementation details, reasoning for design choices, and current limitations. + +## Overview + +From a high-level, the Commit Queue works as follow: + +1. Collaborators will add `commit-queue` label to Pull Reuqests ready to land +2. Every five minutes the queue will do the following for each Pull Request + with the label: + 1. Check if the PR also has a `request-ci` label (if it has, skip this PR + since it's pending a CI run) + 2. Check if the last Jenkins CI is finished running (if it is not, skip this + PR) + 3. Remove the `commit-queue` label + 4. Run `git node land ` + 5. If it fails: + 1. Abort `git node land` session + 2. Add `commit-queue-failed` label to the PR + 3. Leave a comment on the PR with the output from `git node land` + 4. Skip next steps, go to next PR in the queue + 6. If it succeeds: + 1. Push the changes to nodejs/node + 2. Leave a comment on the PR with `Landed in ...` + 3. Close the PR + 4. Go to next PR in the queue + +## Current Limitations + +The Commit Queue feature is still in early stages, and as such it might not +work for more complex Pull Requests. These are the currently known limitations +of the commit queue: + +1. The Pull Request must have only one commit +2. A CI must've ran and succeeded since the last change on the PR +3. A Collaborator must have approved the PR since the last change +4. Only Jenkins CI is checked (Actions, V8 CI and CITGM are ignored) + +## Implementation + +The [action](/.github/workflows/commit_queue.yml) will run on scheduler +events every five minutes. Five minutes is the smallest number accepted by +the scheduler. The scheduler is not guaranteed to run every five minutes, it +might take longer between runs. + +Using the scheduler is preferrable over using pull_request_target for two +reasons: + +1. if two Commit Queue Actions execution overlap, there's a high-risk that + the last one to finish will fail because the local branch will be out of + sync with the remote after the first Action pushes. `issue_comment` event + has the same limitation. +2. `pull_request_target` will only run if the Action exists on the base commit + of a pull request, and it will run the Action version present on that + commit, meaning we wouldn't be able to use it for already opened PRs + without rebasing them first. + +`node-core-utils` is configured with a personal token and +a Jenkins token from +[@nodejs-github-bot](/~https://github.com/nodejs/github-bot). +`octokit/graphql-action` is used to fetch all Pull Requests with the +`commit-queue` label. The output is a JSON payload, so `jq` is used to turn +that into a list of PR ids we can pass as arguments to +[`commit-queue.sh`](./tools/actions/commit-queue.sh). + +> The personal token only needs permission for public repositories and to read +> profiles, we can use the GITHUB_TOKEN for write operations. Jenkins token is +> required to check CI status. + +`commit-queue.sh` receives the following positional arguments: + +1. The repository owner +2. The repository name +3. The Action GITHUB_TOKEN +4. Every positional argument starting at this one will be a Pull Reuqest ID of + a Pull Request with commit-queue set. + +The script will iterate over the pull requests. `ncu-ci` is used to check if +the last CI is still pending, and calls to the GitHub API are used to check if +the PR is waiting for CI to start (`request-ci` label). The PR is skipped if CI +is pending. No other CI validation is done here since `git node land` will fail +if the last CI failed. + +The script removes the `commit-queue` label. It then runs `git node land`, +forwarding stdout and stderr to a file. If any errors happens, +`git node land --abort` is run, and then a `commit-queue-failed` label is added +to the PR, as well as a comment with the output of `git node land`. + +If no errors happen during `git node land`, the script will use the +`GITHUB_TOKEN` to push the changes to `master`, and then will leave a +`Landed in ...` comment in the PR, and then will close it. Iteration continues +until all PRs have done the steps above. + +## Reverting Broken Commits + +Reverting broken commits is done manually by Collaborators, just like when +commits are landed manually via `git node land`. An easy way to revert is a +good feature for the project, but is not explicitly required for the Commit +Queue to work because the Action lands PRs just like collaborators do today. If +once we start using the Commit Queue we notice that the number of required +reverts increases drastically, we can pause the queue until a Revert Queue is +implemented, but until then we can enable the Commit Queue and then work on a +Revert Queue as a follow up. diff --git a/tools/actions/commit-queue.sh b/tools/actions/commit-queue.sh new file mode 100755 index 00000000000000..f572089e2d5fca --- /dev/null +++ b/tools/actions/commit-queue.sh @@ -0,0 +1,85 @@ +#!/bin/bash + +set -xe + +OWNER=$1 +REPOSITORY=$2 +GITHUB_TOKEN=$3 +shift 3 + +API_URL=https://api.github.com +COMMIT_QUEUE_LABEL='commit-queue' +COMMIT_QUEUE_FAILED_LABEL='commit-queue-failed' + +function issueUrl() { + echo "$API_URL/repos/${OWNER}/${REPOSITORY}/issues/${1}" +} + +function labelsUrl() { + echo "$(issueUrl "${1}")/labels" +} + +function commentsUrl() { + echo "$(issueUrl "${1}")/comments" +} + +function gitHubCurl() { + url=$1 + method=$2 + shift 2 + + curl -fsL --request "$method" \ + --url "$url" \ + --header "authorization: Bearer ${GITHUB_TOKEN}" \ + --header 'content-type: application/json' "$@" +} + + +# TODO(mmarchini): should this be set with whoever added the label for each PR? +git config --local user.email "github-bot@iojs.org" +git config --local user.name "Node.js GitHub Bot" + +for pr in "$@"; do + # Skip PR if CI was requested + if gitHubCurl "$(labelsUrl "$pr")" GET | jq -e 'map(.name) | index("request-ci")'; then + echo "pr ${pr} skipped, waiting for CI to start" + continue + fi + + # Skip PR if CI is still running + if ncu-ci url "/~https://github.com/${OWNER}/${REPOSITORY}/pull/${pr}" 2>&1 | grep "^Result *PENDING"; then + echo "pr ${pr} skipped, CI still running" + continue + fi + + # Delete the commit queue label + gitHubCurl "$(labelsUrl "$pr")"/"$COMMIT_QUEUE_LABEL" DELETE + + git node land --yes "$pr" >output 2>&1 || echo "Failed to land #${pr}" + # cat here otherwise we'll be supressing the output of git node land + cat output + + # TODO(mmarchini): workaround for ncu not returning the expected status code, + # if the "Landed in..." message was not on the output we assume land failed + if ! tail -n 10 output | grep '. Post "Landed in .*/pull/'"${pr}"; then + gitHubCurl "$(labelsUrl "$pr")" POST --data '{"labels": ["'"${COMMIT_QUEUE_FAILED_LABEL}"'"]}' + + jq -n --arg content "
Commit Queue failed
$(cat output)
" '{body: $content}' > output.json + cat output.json + + gitHubCurl "$(commentsUrl "$pr")" POST --data @output.json + + rm output output.json + # If `git node land --abort` fails, we're in unknown state. Better to stop + # the script here, current PR was removed from the queue so it shouldn't + # interfer again in the future + git node land --abort --yes + else + rm output + git push origin master + + gitHubCurl "$(commentsUrl "$pr")" POST --data '{"body": "Landed in '"$(git rev-parse HEAD)"'"}' + + gitHubCurl "$(issueUrl "$pr")" PATCH --data '{"state": "closed"}' + fi +done