diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml index c758b5c48..d62ff4786 100644 --- a/.github/workflows/bench.yml +++ b/.github/workflows/bench.yml @@ -42,7 +42,7 @@ jobs: RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it N_USERS: 8 DURATION: 10m - if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.event.push.ref == 'refs/heads/master' }} + if: ${{ github.event.inputs.gpu-series == 'Standard_NC4as_T4_v3' || github.event.schedule || github.event.pull_request || github.head_ref == 'master' || github.ref_name == 'master' || github.event.push.ref == 'refs/heads/master' }} steps: - name: Clone id: checkout @@ -143,6 +143,7 @@ jobs: - name: Commit status uses: Sibz/github-status-action@v1 + continue-on-error: true # If not authorized on external repo with: authToken: ${{secrets.GITHUB_TOKEN}} sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }} diff --git a/llama.cpp b/llama.cpp index 892d46fbc..77ec9b7a1 100644 --- a/llama.cpp +++ b/llama.cpp @@ -9152,8 +9152,9 @@ struct llm_build_context { if (il == n_layer - 1) { // skip computing output for unused tokens struct ggml_tensor * inp_out_ids = build_inp_out_ids(); - cur = ggml_get_rows(ctx0, cur, inp_out_ids); - inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + cur = ggml_get_rows(ctx0, cur, inp_out_ids); + inpL = ggml_get_rows(ctx0, inpL, inp_out_ids); + ffn_inp = ggml_get_rows(ctx0, ffn_inp, inp_out_ids); } struct ggml_tensor * attn_out = cur;