diff --git a/Guides/Resources/SortedPrefix/FewElements.png b/Guides/Resources/SortedPrefix/FewElements.png new file mode 100644 index 00000000..56ccfccf Binary files /dev/null and b/Guides/Resources/SortedPrefix/FewElements.png differ diff --git a/Guides/Resources/SortedPrefix/ManyElements.png b/Guides/Resources/SortedPrefix/ManyElements.png new file mode 100644 index 00000000..52cd8c95 Binary files /dev/null and b/Guides/Resources/SortedPrefix/ManyElements.png differ diff --git a/Guides/SortedPrefix.md b/Guides/SortedPrefix.md new file mode 100644 index 00000000..297da458 --- /dev/null +++ b/Guides/SortedPrefix.md @@ -0,0 +1,48 @@ +# Sorted Prefix + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/PartialSort.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/PartialSortTests.swift)] + +Returns the first k elements of this collection when it's sorted. + +If you need to sort a collection but only need access to a prefix of its elements, using this method can give you a performance boost over sorting the entire collection. The order of equal elements is guaranteed to be preserved. + +```swift +let numbers = [7,1,6,2,8,3,9] +let smallestThree = numbers.sortedPrefix(3, by: <) +// [1, 2, 3] +``` + +## Detailed Design + +This adds the `Collection` method shown below: + +```swift +extension Collection { + public func sortedPrefix(_ count: Int, by areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows -> [Element] +} +``` + +Additionally, a version of this method for `Comparable` types is also provided: + +```swift +extension Collection where Element: Comparable { + public func sortedPrefix(_ count: Int) -> [Element] +} +``` + +### Complexity + +The algorithm used is based on [Soroush Khanlou's research on this matter](https://khanlou.com/2018/12/analyzing-complexity/). The total complexity is `O(k log k + nk)`, which will result in a runtime close to `O(n)` if k is a small amount. If k is a large amount (more than 10% of the collection), we fall back to sorting the entire array. Realistically, this means the worst case is actually `O(n log n)`. + +Here are some benchmarks we made that demonstrates how this implementation (SmallestM) behaves when k increases (before implementing the fallback): + +![Benchmark](Resources/SortedPrefix/FewElements.png) +![Benchmark 2](Resources/SortedPrefix/ManyElements.png) + +### Comparison with other languages + +**C++:** The `` library defines a `partial_sort` function where the entire array is returned using a partial heap sort. + +**Python:** Defines a `heapq` priority queue that can be used to manually achieve the same result. + diff --git a/README.md b/README.md index 6bc15bc8..c441ca33 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,10 @@ Read more about the package, and the intent behind it, in the [announcement on s - [`randomStableSample(count:)`, `randomStableSample(count:using:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/RandomSampling.md): Randomly selects a specific number of elements from a collection, preserving their original relative order. - [`uniqued()`, `uniqued(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Unique.md): The unique elements of a collection, preserving their order. +#### Partial sorting + +- [`sortedPrefix(_:by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/SortedPrefix.md): Returns the first k elements of a sorted collection. + #### Other useful operations - [`chunked(by:)`, `chunked(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chunked.md): Eager and lazy operations that break a collection into chunks based on either a binary predicate or when the result of a projection changes. diff --git a/Sources/Algorithms/SortedPrefix.swift b/Sources/Algorithms/SortedPrefix.swift new file mode 100644 index 00000000..88b1ed54 --- /dev/null +++ b/Sources/Algorithms/SortedPrefix.swift @@ -0,0 +1,99 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension Collection { + /// Returns the first k elements of this collection when it's sorted using + /// the given predicate as the comparison between elements. + /// + /// This example partially sorts an array of integers to retrieve its three + /// smallest values: + /// + /// let numbers = [7,1,6,2,8,3,9] + /// let smallestThree = numbers.sortedPrefix(3, by: <) + /// // [1, 2, 3] + /// + /// If you need to sort a collection but only need access to a prefix of its + /// elements, using this method can give you a performance boost over sorting + /// the entire collection. The order of equal elements is guaranteed to be + /// preserved. + /// + /// - Parameter count: The k number of elements to prefix. + /// - Parameter areInIncreasingOrder: A predicate that returns true if its + /// first argument should be ordered before its second argument; + /// otherwise, false. + /// + /// - Complexity: O(k log k + nk) + public func sortedPrefix( + _ count: Int, + by areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows -> [Self.Element] { + assert(count >= 0, """ + Cannot prefix with a negative amount of elements! + """ + ) + + // Do nothing if we're prefixing nothing. + guard count > 0 else { + return [] + } + + // Make sure we are within bounds. + let prefixCount = Swift.min(count, self.count) + + // If we're attempting to prefix more than 10% of the collection, it's + // faster to sort everything. + guard prefixCount < (self.count / 10) else { + return Array(try sorted(by: areInIncreasingOrder).prefix(prefixCount)) + } + + var result = try self.prefix(prefixCount).sorted(by: areInIncreasingOrder) + for e in self.dropFirst(prefixCount) { + if let last = result.last, try areInIncreasingOrder(last, e) { + continue + } + let insertionIndex = + try result.partitioningIndex { try areInIncreasingOrder(e, $0) } + let isLastElement = insertionIndex == result.endIndex + result.removeLast() + if isLastElement { + result.append(e) + } else { + result.insert(e, at: insertionIndex) + } + } + + return result + } +} + +extension Collection where Element: Comparable { + /// Returns the first k elements of this collection when it's sorted in + /// ascending order. + /// + /// This example partially sorts an array of integers to retrieve its three + /// smallest values: + /// + /// let numbers = [7,1,6,2,8,3,9] + /// let smallestThree = numbers.sortedPrefix(3) + /// // [1, 2, 3] + /// + /// If you need to sort a collection but only need access to a prefix of its + /// elements, using this method can give you a performance boost over sorting + /// the entire collection. The order of equal elements is guaranteed to be + /// preserved. + /// + /// - Parameter count: The k number of elements to prefix. + /// + /// - Complexity: O(k log k + nk) + public func sortedPrefix(_ count: Int) -> [Element] { + return sortedPrefix(count, by: <) + } +} diff --git a/Tests/SwiftAlgorithmsTests/SortedPrefixTests.swift b/Tests/SwiftAlgorithmsTests/SortedPrefixTests.swift new file mode 100644 index 00000000..17c21613 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/SortedPrefixTests.swift @@ -0,0 +1,301 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2020 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class SortedPrefixTests: XCTestCase { + func testEmpty() { + let array = [Int]() + XCTAssertEqual(array.sortedPrefix(0), []) + } + + func testSortedPrefixWithOrdering() { + let array: [Int] = [20, 1, 4, 70, 100, 2, 3, 7, 90] + + XCTAssertEqual(array.sortedPrefix(0, by: >), []) + XCTAssertEqual( + array.sortedPrefix(1, by: >), + [100] + ) + + XCTAssertEqual( + array.sortedPrefix(5, by: >), + [100, 90, 70, 20, 7] + ) + + XCTAssertEqual( + array.sortedPrefix(9, by: >), + [100, 90, 70, 20, 7, 4, 3, 2, 1] + ) + + XCTAssertEqual([1].sortedPrefix(0, by: <), []) + XCTAssertEqual([1].sortedPrefix(0, by: >), []) + XCTAssertEqual([1].sortedPrefix(1, by: <), [1]) + XCTAssertEqual([1].sortedPrefix(1, by: >), [1]) + XCTAssertEqual([0, 1].sortedPrefix(1, by: <), [0]) + XCTAssertEqual([1, 0].sortedPrefix(1, by: <), [0]) + XCTAssertEqual([1, 0].sortedPrefix(2, by: <), [0, 1]) + XCTAssertEqual([0, 1].sortedPrefix(1, by: >), [1]) + XCTAssertEqual([1, 0].sortedPrefix(1, by: >), [1]) + XCTAssertEqual([1, 0].sortedPrefix(2, by: >), [1, 0]) + + XCTAssertEqual( + [1, 2, 3, 4, 7, 20, 70, 90, 100].sortedPrefix(5, by: <), + [1, 2, 3, 4, 7] + ) + + XCTAssertEqual( + [1, 2, 3, 4, 7, 20, 70, 90, 100].sortedPrefix(5, by: >), + [100, 90, 70, 20, 7] + ) + + XCTAssertEqual( + [1, 2, 3, 4, 7, 20, 70, 90, 100].sortedPrefix(5, by: >), + [100, 90, 70, 20, 7] + ) + + XCTAssertEqual( + [1, 2, 3, 4, 7, 20, 70, 90, 100].sortedPrefix(5, by: <), + [1, 2, 3, 4, 7] + ) + + XCTAssertEqual( + [4, 5, 6, 1, 2, 3].sortedPrefix(3, by: <), + [1, 2, 3] + ) + + XCTAssertEqual( + [4, 5, 9, 8, 7, 6].sortedPrefix(3, by: <), + [4, 5, 6] + ) + + XCTAssertEqual( + [4, 3, 2, 1].sortedPrefix(1, by: <), + [1] + ) + + XCTAssertEqual( + [4, 2, 1, 3].sortedPrefix(3, by: >), + [4, 3, 2] + ) + + XCTAssertEqual( + [4, 2, 1, 3].sortedPrefix(3, by: <), + [1, 2, 3] + ) + } + + func testSortedPrefixComparable() { + let array: [Int] = [20, 1, 4, 70, 100, 2, 3, 7, 90] + + XCTAssertEqual(array.sortedPrefix(0), []) + + XCTAssertEqual( + array.sortedPrefix(1), + [1] + ) + + XCTAssertEqual( + array.sortedPrefix(5), + [1, 2, 3, 4, 7] + ) + + XCTAssertEqual( + array.sortedPrefix(9), + [1, 2, 3, 4, 7, 20, 70, 90, 100] + ) + } + + func testSortedPrefixWithHugePrefix() { + XCTAssertEqual( + [4, 2, 1, 3].sortedPrefix(.max), + [1, 2, 3, 4] + ) + } + + func testSortedPrefixWithHugeInput() { + let input = (1...1000).shuffled() + + XCTAssertEqual( + input.sortedPrefix(0, by: <), + [] + ) + + XCTAssertEqual( + input.sortedPrefix(0, by: >), + [] + ) + + XCTAssertEqual( + input.sortedPrefix(1, by: <), + [1] + ) + + XCTAssertEqual( + input.sortedPrefix(1, by: >), + [1000] + ) + + XCTAssertEqual( + input.sortedPrefix(5, by: <), + [1, 2, 3, 4, 5] + ) + + XCTAssertEqual( + input.sortedPrefix(5, by: >), + [1000, 999, 998, 997, 996] + ) + + XCTAssertEqual( + input.sortedPrefix(10, by: <), + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + ) + + XCTAssertEqual( + input.sortedPrefix(10, by: >), + [1000, 999, 998, 997, 996, 995, 994, 993, 992, 991] + ) + + XCTAssertEqual( + input.sortedPrefix(50, by: <), + Array((1...50)) + ) + + XCTAssertEqual( + input.sortedPrefix(50, by: >), + Array((1...1000).reversed().prefix(50)) + ) + + XCTAssertEqual( + input.sortedPrefix(250, by: <), + Array((1...250)) + ) + + XCTAssertEqual( + input.sortedPrefix(250, by: >), + Array((1...1000).reversed().prefix(250)) + ) + + XCTAssertEqual( + input.sortedPrefix(500, by: <), + Array((1...500)) + ) + + XCTAssertEqual( + input.sortedPrefix(500, by: >), + Array((1...1000).reversed().prefix(500)) + ) + + XCTAssertEqual( + input.sortedPrefix(750, by: <), + Array((1...750)) + ) + + XCTAssertEqual( + input.sortedPrefix(750, by: >), + Array((1...1000).reversed().prefix(750)) + ) + + XCTAssertEqual( + input.sortedPrefix(1000, by: <), + Array((1...1000)) + ) + + XCTAssertEqual( + input.sortedPrefix(1000, by: >), + (1...1000).reversed() + ) + + XCTAssertEqual( + ([0] + Array(repeating: 1, count: 100)).sortedPrefix(1, by: <), + [0] + ) + + XCTAssertEqual( + ([1] + Array(repeating: 0, count: 100)).sortedPrefix(1, by: <), + [0] + ) + + XCTAssertEqual( + ([0] + Array(repeating: 1, count: 100)).sortedPrefix(2, by: <), + [0, 1] + ) + + XCTAssertEqual( + ([1] + Array(repeating: 0, count: 100)).sortedPrefix(2, by: <), + [0, 0] + ) + + XCTAssertEqual( + ([1] + Array(repeating: 1, count: 100)).sortedPrefix(1, by: >), + [1] + ) + + XCTAssertEqual( + ([0] + Array(repeating: 1, count: 100)).sortedPrefix(1, by: >), + [1] + ) + + XCTAssertEqual( + ([1] + Array(repeating: 0, count: 100)).sortedPrefix(2, by: >), + [1, 0] + ) + + XCTAssertEqual( + ([0] + Array(repeating: 1, count: 100)).sortedPrefix(2, by: >), + [1, 1] + ) + } + + func testStability() { + assertStability([1,1,1,2,5,7,3,6,2,5,7,3,6], withPrefix: 3) + assertStability([1,1,1,2,5,7,3,6,2,5,7,3,6], withPrefix: 6) + assertStability([1,1,1,2,5,7,3,6,2,5,7,3,6], withPrefix: 20) + assertStability([1,1,1,2,5,7,3,6,2,5,7,3,6], withPrefix: 1000) + assertStability(Array(repeating: 0, count: 100), withPrefix: 0) + assertStability(Array(repeating: 0, count: 100), withPrefix: 1) + assertStability(Array(repeating: 0, count: 100), withPrefix: 2) + assertStability(Array(repeating: 0, count: 100), withPrefix: 5) + assertStability(Array(repeating: 0, count: 100), withPrefix: 20) + assertStability(Array(repeating: 0, count: 100), withPrefix: 100) + assertStability(Array(repeating: 1, count: 50) + Array(repeating: 0, count: 50), withPrefix: 2) + assertStability(Array(repeating: 1, count: 50) + Array(repeating: 0, count: 50), withPrefix: 5) + assertStability(Array(repeating: 1, count: 50) + Array(repeating: 0, count: 50), withPrefix: 20) + assertStability(Array(repeating: 1, count: 50) + Array(repeating: 0, count: 50), withPrefix: 50) + assertStability([0,0], withPrefix: 1) + assertStability([0,0], withPrefix: 2) + assertStability([0,1,0,1,0,1], withPrefix: 2) + assertStability([0,1,0,1,0,1], withPrefix: 6) + assertStability([0,0,0,1,1,1], withPrefix: 1) + assertStability([0,0,0,1,1,1], withPrefix: 3) + assertStability([0,0,0,1,1,1], withPrefix: 4) + assertStability([0,0,0,1,1,1], withPrefix: 6) + assertStability([1,1,1,0,0,0], withPrefix: 1) + assertStability([1,1,1,0,0,0], withPrefix: 3) + assertStability([1,1,1,0,0,0], withPrefix: 4) + assertStability([1,1,1,0,0,0], withPrefix: 6) + assertStability([1,1,1,0,0,0], withPrefix: 5) + } + + func assertStability( + _ actual: [Int], + withPrefix prefixCount: Int + ) { + let indexed = actual.enumerated() + let sorted = indexed.map { $0 }.sortedPrefix(prefixCount) { $0.element < $1.element } + + for element in Set(actual) { + let filtered = sorted.filter { $0.element == element }.map(\.offset) + XCTAssertEqual(filtered, filtered.sorted()) + } + } +}