Skip to content

Commit

Permalink
updated to 0.3.1 removing support for Int etc
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaodaigh committed Sep 21, 2024
1 parent 5cda27e commit a084b33
Show file tree
Hide file tree
Showing 18 changed files with 1,900 additions and 687 deletions.
39 changes: 29 additions & 10 deletions Manifest.toml
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
# This file is machine-generated - editing it directly is not advised

julia_version = "1.10.4"
julia_version = "1.11.0-rc3"
manifest_format = "2.0"
project_hash = "4352c645febe0561f79d16d2b1ac56a45920a3d0"
project_hash = "77279a8ad546ad0c5e3723b7d77f8d51b314fbce"

[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
version = "1.11.0"

[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
version = "1.11.0"

[[deps.CategoricalArrays]]
deps = ["DataAPI", "Future", "Missings", "Printf", "Requires", "Statistics", "Unicode"]
Expand Down Expand Up @@ -57,6 +59,7 @@ version = "0.18.20"
[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
version = "1.11.0"

[[deps.DocStringExtensions]]
deps = ["LibGit2"]
Expand All @@ -67,10 +70,12 @@ version = "0.9.3"
[[deps.Future]]
deps = ["Random"]
uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820"
version = "1.11.0"

[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
version = "1.11.0"

[[deps.IrrationalConstants]]
git-tree-sha1 = "630b497eafcc20001bba38a4651b327dcfc491d2"
Expand All @@ -80,11 +85,12 @@ version = "0.2.2"
[[deps.LibGit2]]
deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
version = "1.11.0"

[[deps.LibGit2_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"]
uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5"
version = "1.6.4+0"
version = "1.7.2+0"

[[deps.LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
Expand All @@ -93,10 +99,12 @@ version = "1.11.0+1"

[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
version = "1.11.0"

[[deps.LinearAlgebra]]
deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
version = "1.11.0"

[[deps.LogExpFunctions]]
deps = ["DocStringExtensions", "IrrationalConstants", "LinearAlgebra"]
Expand All @@ -117,11 +125,12 @@ version = "0.3.28"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
version = "1.11.0"

[[deps.MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
version = "2.28.2+1"
version = "2.28.6+0"

[[deps.Missings]]
deps = ["DataAPI"]
Expand All @@ -136,7 +145,7 @@ version = "1.2.0"
[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
version = "0.3.23+4"
version = "0.3.27+1"

[[deps.OrderedCollections]]
git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5"
Expand All @@ -146,10 +155,12 @@ version = "1.6.3"
[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
version = "1.11.0"

[[deps.Random]]
deps = ["SHA"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
version = "1.11.0"

[[deps.Requires]]
deps = ["UUIDs"]
Expand All @@ -163,6 +174,7 @@ version = "0.7.0"

[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
version = "1.11.0"

[[deps.SortingAlgorithms]]
deps = ["DataStructures"]
Expand All @@ -173,12 +185,17 @@ version = "1.2.1"
[[deps.SparseArrays]]
deps = ["Libdl", "LinearAlgebra", "Random", "Serialization", "SuiteSparse_jll"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
version = "1.10.0"
version = "1.11.0"

[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
deps = ["LinearAlgebra"]
git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0"
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
version = "1.10.0"
version = "1.11.1"
weakdeps = ["SparseArrays"]

[deps.Statistics.extensions]
SparseArraysExt = ["SparseArrays"]

[[deps.StatsAPI]]
deps = ["LinearAlgebra"]
Expand All @@ -195,7 +212,7 @@ version = "0.34.3"
[[deps.SuiteSparse_jll]]
deps = ["Artifacts", "Libdl", "libblastrampoline_jll"]
uuid = "bea87d4a-7f5b-5778-9afe-8cc45184846c"
version = "7.2.1+1"
version = "7.7.0+0"

[[deps.TOML]]
deps = ["Dates"]
Expand All @@ -205,11 +222,13 @@ version = "1.0.3"
[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
version = "1.11.0"

[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
version = "1.11.0"

[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
version = "5.8.0+1"
version = "5.11.0+0"
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "SortingLab"
uuid = "562c1548-17b8-5b69-83cf-d8aebec229f5"
authors = ["Dai ZJ <[email protected]>"]
version = "0.3.0"
version = "0.3.1"

[deps]
CategoricalArrays = "324d7699-5711-5eae-9e2f-1d82baa6b597"
Expand Down
24 changes: 11 additions & 13 deletions README.jmd
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
---
title : SortingLab README
author : Dai ZJ
date: 2019--09-28
options:
date: 2024--09-21
weave_options:
out_path : README.MD
doctype : github
---
Expand All @@ -11,9 +11,9 @@ An alternative implementation of sorting algorithms and APIs. The ultimate aim i

# Faster Sort and Sortperm

The main function exported by SortingLab is `fsort` and `fsortperm` which generally implements faster algorithms than `sort` and `sortperm` for `CategoricalArrays.CategoricalVector`, `Vector{T}`, `Vector{Union{T, Missing}}` where `T` is
The main function exported by SortingLab is `fsort` and `fsortperm` which generally implements faster algorithms than `sort` and `sortperm` for `CategoricalArrays.CategoricalVector`, `Vector{T}`, `Vector{Union{String, Missing}}` where `T` is

* Int*, UInt*, Float*, String
**Update Sep'2024**: SortingLab.jl used to be faster than base on integer sorting which is no longer the case! Well done base!

**Note**: The reason why we restrict the type to `Vector` is that SortingLab.jl assumes something about memory layout and hence `Vector` provides that guarantee in the types supported.

Expand Down Expand Up @@ -67,25 +67,24 @@ using SortingLab
@test isequal(fsort(x), sort(x))
```


## Benchmarks
![Base.sort vs SortingLab.radixsort](benchmarks/sort_vs_radixsort.png)

![Base.sort vs SortingLab.radixsort](benchmarks/sortperm_vs_fsortperm.png)

![Base.sort vs SortingLab.fsort](benchmarks/fsort_missing_100m_int.png)

![Base.sortperm vs SortingLab.sortperm](benchmarks/fsortperm_missing_100m_int.png)
#![Base.sort vs SortingLab.radixsort](benchmarks/sortperm_vs_fsortperm.png)

## Benchmarking code
```julia; eval=false
```julia
using SortingLab;
using BenchmarkTools;
import Random: randstring
using Test
using Missings: allowmissing
using Plots, StatsPlots

N = 1_000_000;
K = 100;

# String Sort
svec = rand("id".*string.(1:N÷K, pad=10), N);
sort_id_1m = @belapsed sort($svec);
radixsort_id_1m = @belapsed radixsort($svec);
Expand All @@ -100,8 +99,7 @@ radixsort_r_1m = @belapsed radixsort($rsvec);
sortperm_r_1m = @belapsed sortperm($rsvec);
fsortperm_r_1m = @belapsed fsortperm($rsvec);

using Plots
using StatsPlots

groupedbar(
repeat(["IDs", "Random len 32"], inner=2),
[sort_id_1m, radixsort_id_1m, sort_r_1m, radixsort_r_1m],
Expand Down
25 changes: 17 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
---
author: "Dai ZJ"
title: "SortingLab README"
date: "2019--09-28"
date: "2024--09-21"
---

# SortingLab
An alternative implementation of sorting algorithms and APIs. The ultimate aim is to contribute back to Julia base or SortingAlgorithms.jl. However, there is commitment to keep this package's API stable and supported, so other developers can rely on the implementation and API here.

# Faster Sort and Sortperm

The main function exported by SortingLab is `fsort` and `fsortperm` which generally implements faster algorithms than `sort` and `sortperm` for `CategoricalArrays.CategoricalVector`, `Vector{T}`, `Vector{Union{T, Missing}}` where `T` is
The main function exported by SortingLab is `fsort` and `fsortperm` which generally implements faster algorithms than `sort` and `sortperm` for `CategoricalArrays.CategoricalVector`, `Vector{T}`, `Vector{Union{String, Missing}}` where `T` is

* Int*, UInt*, Float*, String
**Update Sep'2024**: SortingLab.jl used to be faster than base on integer sorting which is no longer the case! Well done base!

**Note**: The reason why we restrict the type to `Vector` is that SortingLab.jl assumes something about memory layout and hence `Vector` provides that guarantee in the types supported.

Expand Down Expand Up @@ -97,21 +97,25 @@ Test Passed
## Benchmarks
![Base.sort vs SortingLab.radixsort](benchmarks/sort_vs_radixsort.png)

![Base.sort vs SortingLab.radixsort](benchmarks/sortperm_vs_fsortperm.png)
#![Base.sort vs SortingLab.radixsort](benchmarks/sortperm_vs_fsortperm.png)

![Base.sort vs SortingLab.fsort](benchmarks/fsort_missing_100m_int.png)
![Integer Base.sort vs SortingLab.fsort](benchmarks/int_1m_sort.png)

![Base.sortperm vs SortingLab.sortperm](benchmarks/fsortperm_missing_100m_int.png)
![Integer Base.sort vs SortingLab.fsort](benchmarks/int_1m_sortperm.png)

## Benchmarking code
```julia
using SortingLab;
using BenchmarkTools;
import Random: randstring
using Test
using Missings: allowmissing
using Plots, StatsPlots

N = 1_000_000;
K = 100;

# String Sort
svec = rand("id".*string.(1:N÷K, pad=10), N);
sort_id_1m = @belapsed sort($svec);
radixsort_id_1m = @belapsed radixsort($svec);
Expand All @@ -126,8 +130,7 @@ radixsort_r_1m = @belapsed radixsort($rsvec);
sortperm_r_1m = @belapsed sortperm($rsvec);
fsortperm_r_1m = @belapsed fsortperm($rsvec);

using Plots
using StatsPlots

groupedbar(
repeat(["IDs", "Random len 32"], inner=2),
[sort_id_1m, radixsort_id_1m, sort_r_1m, radixsort_r_1m],
Expand All @@ -143,6 +146,12 @@ groupedbar(
savefig("benchmarks/sortperm_vs_fsortperm.png")
```

```
"C:\\git\\SortingLab\\benchmarks\\sortperm_vs_fsortperm.png"
```





# Similar package
Expand Down
33 changes: 33 additions & 0 deletions benchmarks/fsort-fsortperm-Int-MIssing.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# sort integer with missing
x = allowmissing(rand(1:10_000, 1_000_000))
x[rand(1:length(x), 100_000)] .= missing
@test isequal(fsort(x), sort(x))

int_missing_fsort_timing = @belapsed fsort(x);
int_missing_sort_timing = @belapsed sort(x);

# sort integer without missing
xx = rand(1:10_000, 1_000_000)
int_fsort_timing = @belapsed fsort(xx)
int_missing_sort_timing = @belapsed sort(xx)

groupedbar(
repeat(["1m Integer w missing", "1m Integer wo missing"], inner=2),
[int_missing_sort_timing, int_missing_fsort_timing, int_missing_sort_timing, int_fsort_timing],
group=repeat(["Base.sort", "SortingLab.fsort"], outer=2),
title="Intger sort (1m rows): Base vs SortingLab")
savefig("benchmarks/int_1m_sort.png")


int_missing_fsortperm_timing = @belapsed fsortperm(x);
int_missing_sortperm_timing = @belapsed sortperm(x);

int_fsortperm_timing = @belapsed fsortperm(xx)
int_missing_sortperm_timing = @belapsed sortperm(xx)

groupedbar(
repeat(["1m Integer w missing", "1m Integer wo missing"], inner=2),
[int_missing_sortperm_timing, int_missing_fsortperm_timing, int_missing_sortperm_timing, int_fsortperm_timing],
group=repeat(["Base.sort", "SortingLab.fsort"], outer=2),
title="Intger sortperm (1m rows): Base vs SortingLab")
savefig("benchmarks/int_1m_sortperm.png")
Binary file removed benchmarks/fsort_missing_100m_int.png
Binary file not shown.
Binary file removed benchmarks/fsortperm_missing_100m_int.png
Binary file not shown.
Binary file modified benchmarks/sort_vs_radixsort.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified benchmarks/sortperm_vs_fsortperm.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
3 changes: 1 addition & 2 deletions build-readme.jl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# Weave readme
using Pkg
cd("c:/git/SortingLab/")
Pkg.activate("c:/git/SortingLab/readme-env")
Pkg.activate("readme-env")
Pkg.update()

using Weave
Expand Down
Loading

2 comments on commit a084b33

@xiaodaigh
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/115634

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.3.1 -m "<description of version>" a084b338c25061aea041062199457e72e03e1fce
git push origin v0.3.1

Please sign in to comment.