Rust implementation of the CVM algorithm for counting distinct elements in a stream
0

Configure Feed

Select the types of activity you want to include in your feed.

Update to latest cargo-dist

+100 -70
+80 -54
.github/workflows/release.yml
··· 1 + # This file was autogenerated by dist: https://github.com/astral-sh/cargo-dist 2 + # 1 3 # Copyright 2022-2024, axodotdev 4 + # Copyright 2025 Astral Software Inc. 2 5 # SPDX-License-Identifier: MIT or Apache-2.0 3 6 # 4 7 # CI that: 5 8 # 6 9 # * checks for a Git Tag that looks like a release 7 - # * builds artifacts with cargo-dist (archives, installers, hashes) 10 + # * builds artifacts with dist (archives, installers, hashes) 8 11 # * uploads those artifacts to temporary workflow zip 9 12 # * on success, uploads the artifacts to a GitHub Release 10 13 # ··· 12 15 # title/body based on your changelogs. 13 16 14 17 name: Release 15 - 16 18 permissions: 17 - contents: write 19 + "contents": "write" 18 20 19 21 # This task will run whenever you push a git tag that looks like a version 20 22 # like "1.0.0", "v0.1.0-prerelease.1", "my-app/0.1.0", "releases/v1.0.0", etc. ··· 23 25 # must be a Cargo-style SemVer Version (must have at least major.minor.patch). 24 26 # 25 27 # If PACKAGE_NAME is specified, then the announcement will be for that 26 - # package (erroring out if it doesn't have the given version or isn't cargo-dist-able). 28 + # package (erroring out if it doesn't have the given version or isn't dist-able). 27 29 # 28 30 # If PACKAGE_NAME isn't specified, then the announcement will be for all 29 - # (cargo-dist-able) packages in the workspace with that version (this mode is 31 + # (dist-able) packages in the workspace with that version (this mode is 30 32 # intended for workspaces with only one dist-able package, or with all dist-able 31 33 # packages versioned/released in lockstep). 32 34 # ··· 38 40 # If there's a prerelease-style suffix to the version, then the release(s) 39 41 # will be marked as a prerelease. 40 42 on: 43 + pull_request: 41 44 push: 42 45 tags: 43 46 - '**[0-9]+.[0-9]+.[0-9]+*' 44 - pull_request: 45 47 46 48 jobs: 47 - # Run 'cargo dist plan' (or host) to determine what tasks we need to do 49 + # Run 'dist plan' (or host) to determine what tasks we need to do 48 50 plan: 49 - runs-on: ubuntu-latest 51 + runs-on: "ubuntu-22.04" 50 52 outputs: 51 53 val: ${{ steps.plan.outputs.manifest }} 52 54 tag: ${{ !github.event.pull_request && github.ref_name || '' }} ··· 57 59 steps: 58 60 - uses: actions/checkout@v4 59 61 with: 62 + persist-credentials: false 60 63 submodules: recursive 61 - - name: Install cargo-dist 64 + - name: Install dist 62 65 # we specify bash to get pipefail; it guards against the `curl` command 63 66 # failing. otherwise `sh` won't catch that `curl` returned non-0 64 67 shell: bash 65 - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" 68 + run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/astral-sh/cargo-dist/releases/download/v0.28.4/cargo-dist-installer.sh | sh" 69 + - name: Cache dist 70 + uses: actions/upload-artifact@v4 71 + with: 72 + name: cargo-dist-cache 73 + path: ~/.cargo/bin/dist 66 74 # sure would be cool if github gave us proper conditionals... 67 75 # so here's a doubly-nested ternary-via-truthiness to try to provide the best possible 68 76 # functionality based on whether this is a pull_request, and whether it's from a fork. ··· 70 78 # but also really annoying to build CI around when it needs secrets to work right.) 71 79 - id: plan 72 80 run: | 73 - cargo dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json 74 - echo "cargo dist ran successfully" 81 + dist ${{ (!github.event.pull_request && format('host --steps=create --tag={0}', github.ref_name)) || 'plan' }} --output-format=json > plan-dist-manifest.json 82 + echo "dist ran successfully" 75 83 cat plan-dist-manifest.json 76 84 echo "manifest=$(jq -c "." plan-dist-manifest.json)" >> "$GITHUB_OUTPUT" 77 85 - name: "Upload dist-manifest.json" ··· 89 97 if: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix.include != null && (needs.plan.outputs.publishing == 'true' || fromJson(needs.plan.outputs.val).ci.github.pr_run_mode == 'upload') }} 90 98 strategy: 91 99 fail-fast: false 92 - # Target platforms/runners are computed by cargo-dist in create-release. 100 + # Target platforms/runners are computed by dist in create-release. 93 101 # Each member of the matrix has the following arguments: 94 102 # 95 103 # - runner: the github runner 96 - # - dist-args: cli flags to pass to cargo dist 97 - # - install-dist: expression to run to install cargo-dist on the runner 104 + # - dist-args: cli flags to pass to dist 105 + # - install-dist: expression to run to install dist on the runner 98 106 # 99 107 # Typically there will be: 100 108 # - 1 "global" task that builds universal installers 101 109 # - N "local" tasks that build each platform's binaries and platform-specific installers 102 110 matrix: ${{ fromJson(needs.plan.outputs.val).ci.github.artifacts_matrix }} 103 111 runs-on: ${{ matrix.runner }} 112 + container: ${{ matrix.container && matrix.container.image || null }} 104 113 env: 105 114 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 106 115 BUILD_MANIFEST_NAME: target/distrib/${{ join(matrix.targets, '-') }}-dist-manifest.json ··· 110 119 git config --global core.longpaths true 111 120 - uses: actions/checkout@v4 112 121 with: 122 + persist-credentials: false 113 123 submodules: recursive 114 - - uses: swatinem/rust-cache@v2 115 - with: 116 - key: ${{ join(matrix.targets, '-') }} 117 - - name: Install cargo-dist 118 - run: ${{ matrix.install_dist }} 124 + - name: Install Rust non-interactively if not already installed 125 + if: ${{ matrix.container }} 126 + run: | 127 + if ! command -v cargo > /dev/null 2>&1; then 128 + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 129 + echo "$HOME/.cargo/bin" >> $GITHUB_PATH 130 + fi 131 + - name: Install dist 132 + run: ${{ matrix.install_dist.run }} 119 133 # Get the dist-manifest 120 134 - name: Fetch local artifacts 121 135 uses: actions/download-artifact@v4 ··· 129 143 - name: Build artifacts 130 144 run: | 131 145 # Actually do builds and make zips and whatnot 132 - cargo dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json 133 - echo "cargo dist ran successfully" 146 + dist build ${{ needs.plan.outputs.tag-flag }} --print=linkage --output-format=json ${{ matrix.dist_args }} > dist-manifest.json 147 + echo "dist ran successfully" 134 148 - id: cargo-dist 135 149 name: Post-build 136 150 # We force bash here just because github makes it really hard to get values up ··· 140 154 run: | 141 155 # Parse out what we just built and upload it to scratch storage 142 156 echo "paths<<EOF" >> "$GITHUB_OUTPUT" 143 - jq --raw-output ".upload_files[]" dist-manifest.json >> "$GITHUB_OUTPUT" 157 + dist print-upload-files-from-manifest --manifest dist-manifest.json >> "$GITHUB_OUTPUT" 144 158 echo "EOF" >> "$GITHUB_OUTPUT" 145 159 146 160 cp dist-manifest.json "$BUILD_MANIFEST_NAME" ··· 157 171 needs: 158 172 - plan 159 173 - build-local-artifacts 160 - runs-on: "ubuntu-20.04" 174 + runs-on: "ubuntu-22.04" 161 175 env: 162 176 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 163 177 BUILD_MANIFEST_NAME: target/distrib/global-dist-manifest.json 164 178 steps: 165 179 - uses: actions/checkout@v4 166 180 with: 181 + persist-credentials: false 167 182 submodules: recursive 168 - - name: Install cargo-dist 169 - shell: bash 170 - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" 183 + - name: Install cached dist 184 + uses: actions/download-artifact@v4 185 + with: 186 + name: cargo-dist-cache 187 + path: ~/.cargo/bin/ 188 + - run: chmod +x ~/.cargo/bin/dist 171 189 # Get all the local artifacts for the global tasks to use (for e.g. checksums) 172 190 - name: Fetch local artifacts 173 191 uses: actions/download-artifact@v4 ··· 178 196 - id: cargo-dist 179 197 shell: bash 180 198 run: | 181 - cargo dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json 182 - echo "cargo dist ran successfully" 199 + dist build ${{ needs.plan.outputs.tag-flag }} --output-format=json "--artifacts=global" > dist-manifest.json 200 + echo "dist ran successfully" 183 201 184 202 # Parse out what we just built and upload it to scratch storage 185 203 echo "paths<<EOF" >> "$GITHUB_OUTPUT" ··· 204 222 if: ${{ always() && needs.plan.outputs.publishing == 'true' && (needs.build-global-artifacts.result == 'skipped' || needs.build-global-artifacts.result == 'success') && (needs.build-local-artifacts.result == 'skipped' || needs.build-local-artifacts.result == 'success') }} 205 223 env: 206 224 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 207 - runs-on: "ubuntu-20.04" 225 + runs-on: "ubuntu-22.04" 208 226 outputs: 209 227 val: ${{ steps.host.outputs.manifest }} 210 228 steps: 211 229 - uses: actions/checkout@v4 212 230 with: 231 + persist-credentials: false 213 232 submodules: recursive 214 - - name: Install cargo-dist 215 - run: "curl --proto '=https' --tlsv1.2 -LsSf https://github.com/axodotdev/cargo-dist/releases/download/v0.13.3/cargo-dist-installer.sh | sh" 233 + - name: Install cached dist 234 + uses: actions/download-artifact@v4 235 + with: 236 + name: cargo-dist-cache 237 + path: ~/.cargo/bin/ 238 + - run: chmod +x ~/.cargo/bin/dist 216 239 # Fetch artifacts from scratch-storage 217 240 - name: Fetch artifacts 218 241 uses: actions/download-artifact@v4 ··· 220 243 pattern: artifacts-* 221 244 path: target/distrib/ 222 245 merge-multiple: true 223 - # This is a harmless no-op for GitHub Releases, hosting for that happens in "announce" 224 246 - id: host 225 247 shell: bash 226 248 run: | 227 - cargo dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json 249 + dist host ${{ needs.plan.outputs.tag-flag }} --steps=upload --steps=release --output-format=json > dist-manifest.json 228 250 echo "artifacts uploaded and released successfully" 229 251 cat dist-manifest.json 230 252 echo "manifest=$(jq -c "." dist-manifest.json)" >> "$GITHUB_OUTPUT" ··· 234 256 # Overwrite the previous copy 235 257 name: artifacts-dist-manifest 236 258 path: dist-manifest.json 259 + # Create a GitHub Release while uploading all files to it 260 + - name: "Download GitHub Artifacts" 261 + uses: actions/download-artifact@v4 262 + with: 263 + pattern: artifacts-* 264 + path: artifacts 265 + merge-multiple: true 266 + - name: Cleanup 267 + run: | 268 + # Remove the granular manifests 269 + rm -f artifacts/*-dist-manifest.json 270 + - name: Create GitHub Release 271 + env: 272 + PRERELEASE_FLAG: "${{ fromJson(steps.host.outputs.manifest).announcement_is_prerelease && '--prerelease' || '' }}" 273 + ANNOUNCEMENT_TITLE: "${{ fromJson(steps.host.outputs.manifest).announcement_title }}" 274 + ANNOUNCEMENT_BODY: "${{ fromJson(steps.host.outputs.manifest).announcement_github_body }}" 275 + RELEASE_COMMIT: "${{ github.sha }}" 276 + run: | 277 + # Write and read notes from a file to avoid quoting breaking things 278 + echo "$ANNOUNCEMENT_BODY" > $RUNNER_TEMP/notes.txt 237 279 238 - # Create a GitHub Release while uploading all files to it 280 + gh release create "${{ needs.plan.outputs.tag }}" --target "$RELEASE_COMMIT" $PRERELEASE_FLAG --title "$ANNOUNCEMENT_TITLE" --notes-file "$RUNNER_TEMP/notes.txt" artifacts/* 281 + 239 282 announce: 240 283 needs: 241 284 - plan ··· 244 287 # still allowing individual publish jobs to skip themselves (for prereleases). 245 288 # "host" however must run to completion, no skipping allowed! 246 289 if: ${{ always() && needs.host.result == 'success' }} 247 - runs-on: "ubuntu-20.04" 290 + runs-on: "ubuntu-22.04" 248 291 env: 249 292 GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} 250 293 steps: 251 294 - uses: actions/checkout@v4 252 295 with: 296 + persist-credentials: false 253 297 submodules: recursive 254 - - name: "Download GitHub Artifacts" 255 - uses: actions/download-artifact@v4 256 - with: 257 - pattern: artifacts-* 258 - path: artifacts 259 - merge-multiple: true 260 - - name: Cleanup 261 - run: | 262 - # Remove the granular manifests 263 - rm -f artifacts/*-dist-manifest.json 264 - - name: Create GitHub Release 265 - uses: ncipollo/release-action@v1 266 - with: 267 - tag: ${{ needs.plan.outputs.tag }} 268 - name: ${{ fromJson(needs.host.outputs.val).announcement_title }} 269 - body: ${{ fromJson(needs.host.outputs.val).announcement_github_body }} 270 - prerelease: ${{ fromJson(needs.host.outputs.val).announcement_is_prerelease }} 271 - artifacts: "artifacts/*"
+1 -16
Cargo.toml
··· 8 8 keywords = ["CVM", "count-distinct", "estimation"] 9 9 categories = ["algorithms", ] 10 10 11 - version = "0.3.0" 11 + version = "0.3.1" 12 12 edition = "2021" 13 13 14 14 [dependencies] ··· 49 49 [[bench]] 50 50 name = "benchmarks" 51 51 harness = false 52 - 53 - # Config for 'cargo dist' 54 - [workspace.metadata.dist] 55 - # The preferred cargo-dist version to use in CI (Cargo.toml SemVer syntax) 56 - cargo-dist-version = "0.13.3" 57 - # CI backends to support 58 - ci = ["github"] 59 - # The installers to generate for each app 60 - installers = ["shell"] 61 - # Target platforms to build apps for (Rust target-triple syntax) 62 - targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] 63 - # Publish jobs to run in CI 64 - pr-run-mode = "plan" 65 - # Whether to install an updater program 66 - install-updater = false
+19
dist-workspace.toml
··· 1 + [workspace] 2 + members = ["cargo:."] 3 + 4 + # Config for 'dist' 5 + [dist] 6 + # The preferred dist version to use in CI (Cargo.toml SemVer syntax) 7 + cargo-dist-version = "0.28.4" 8 + # CI backends to support 9 + ci = "github" 10 + # The installers to generate for each app 11 + installers = ["shell"] 12 + # Target platforms to build apps for (Rust target-triple syntax) 13 + targets = ["aarch64-apple-darwin", "x86_64-apple-darwin", "x86_64-unknown-linux-gnu", "x86_64-pc-windows-msvc"] 14 + # Which actions to run on pull requests 15 + pr-run-mode = "plan" 16 + # Whether to install an updater program 17 + install-updater = false 18 + # Path that installers should place binaries in 19 + install-path = "CARGO_HOME"