Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Persistent collections updates (part 8) #184

Merged
merged 17 commits into from
Sep 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
8c9ae08
[PersistentCollections] Remove dead code
lorentey Sep 23, 2022
9407eff
[PersistentDictionary] Add index striding benchmark
lorentey Sep 22, 2022
a453335
[tests] Delay checking Indices until we’re done with the collection i…
lorentey Sep 22, 2022
eefef1e
[PersistentCollections] Small path-related adjustments
lorentey Sep 22, 2022
77f205f
[PersistentCollections] Implement structural index(_:offsetBy:) & ind…
lorentey Sep 22, 2022
287c677
[PersistentCollections] Fix indexing oversight when spawning a new ch…
lorentey Sep 23, 2022
9150bbf
[PersistentCollections] Cache the item hash within collision nodes
lorentey Sep 23, 2022
2193e79
[PersistentCollections] Slightly optimize compressed path case in isS…
lorentey Sep 23, 2022
92d1d12
[PersistentDictionary] Add a quick&dirty memory benchmark
lorentey Sep 23, 2022
d87f5fd
[PersistentCollections] Optimize key-based lookup operations
lorentey Sep 24, 2022
00ae83d
[PersistentSet] Add ExpressibleByArrayLiteral, Codable, Custom[Debug]…
lorentey Sep 25, 2022
3d10964
[PersistentSet] Flesh out isSubset/iSuperset/isDisjoint/etc overloads…
lorentey Sep 24, 2022
1c6e5cb
[PersistentDictionary] Add CustomReflectable conformance
lorentey Sep 24, 2022
826e22c
[PersistentDictionary] Implement structural filter & compactMapValues
lorentey Sep 25, 2022
c92b0fa
[PersistentSet] Implement structural subtraction, intersection, filte…
lorentey Sep 25, 2022
590dfeb
[PersistentSet] Implement (naive) union, symmetricDifference operatio…
lorentey Sep 25, 2022
ba303dc
[PersistentDictionary] Add a structural initializer that takes a Pers…
lorentey Sep 25, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions Benchmarks/Package.swift
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// swift-tools-version:5.3
// swift-tools-version:5.5
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
Expand All @@ -16,6 +16,7 @@ let package = Package(
name: "swift-collections.Benchmarks",
products: [
.executable(name: "benchmark", targets: ["benchmark"]),
.executable(name: "memory-benchmark", targets: ["memory-benchmark"]),
],
dependencies: [
.package(name: "swift-collections", path: ".."),
Expand All @@ -36,13 +37,20 @@ let package = Package(
.target(
name: "CppBenchmarks"
),
.target(
.executableTarget(
name: "benchmark",
dependencies: [
"Benchmarks",
],
path: "Sources/benchmark-tool"
),
.executableTarget(
name: "memory-benchmark",
dependencies: [
.product(name: "Collections", package: "swift-collections"),
.product(name: "CollectionsBenchmark", package: "swift-collections-benchmark"),
]
),
],
cxxLanguageStandard: .cxx1z
cxxLanguageStandard: .cxx17
)
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,25 @@ extension Benchmark {
}
}

self.add(
title: "PersistentDictionary<Int, Int> striding, 10 steps",
input: [Int].self
) { input in
let d = PersistentDictionary(
uniqueKeysWithValues: input.lazy.map { ($0, 2 * $0) })
let steps = stride(from: 0, through: 10 * d.count, by: d.count)
.map { $0 / 10 }
return { timer in
var i = d.startIndex
for j in 1 ..< steps.count {
let distance = steps[j] - steps[j - 1]
i = identity(d.index(i, offsetBy: distance))
}
precondition(i == d.endIndex)
blackHole(i)
}
}

self.add(
title: "PersistentDictionary<Int, Int> indexing subscript",
input: ([Int], [Int]).self
Expand Down
62 changes: 62 additions & 0 deletions Benchmarks/Sources/memory-benchmark/DictionaryStatistics.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

struct DictionaryStatistics {
/// The sum of all storage within the hash table that is available for
/// item storage, measured in bytes. This does account for the maximum
/// load factor.
var capacityBytes: Int = 0

/// The number of bytes of storage currently used for storing items.
var itemBytes: Int = 0

/// The number of bytes currently available in storage for storing items.
var freeBytes: Int = 0

/// An estimate of the actual memory occupied by this hash table.
/// This includes not only storage space available for items,
/// but also the memory taken up by the object header and the hash table
/// occupation bitmap.
var grossBytes: Int = 0

/// An estimate of how efficiently this data structure manages memory.
/// This is a value between 0 and 1 -- the ratio between how much space
/// the actual stored data occupies and the overall number of bytes allocated
/// for the entire data structure. (`itemBytes / grossBytes`)
var memoryEfficiency: Double {
guard grossBytes > 0 else { return 1 }
return Double(itemBytes) / Double(grossBytes)
}
}

extension Dictionary {
var statistics: DictionaryStatistics {
// Note: This logic is based on the Dictionary ABI. It may be off by a few
// bytes due to not accounting for padding bytes between storage regions.
// The gross bytes reported also do not include extra memory that was
// allocated by malloc but not actually used for Dictionary storage.
var stats = DictionaryStatistics()
let keyStride = MemoryLayout<Key>.stride
let valueStride = MemoryLayout<Value>.stride
stats.capacityBytes = self.capacity * (keyStride + valueStride)
stats.itemBytes = self.count * (keyStride + valueStride)
stats.freeBytes = stats.capacityBytes - stats.itemBytes

let bucketCount = self.capacity._roundUpToPowerOfTwo()
let bitmapBitcount = (bucketCount + UInt.bitWidth - 1)

let objectHeaderBits = 2 * Int.bitWidth
let ivarBits = 5 * Int.bitWidth + 64
stats.grossBytes = (objectHeaderBits + ivarBits + bitmapBitcount) / 8
stats.grossBytes += bucketCount * keyStride + bucketCount * valueStride
return stats
}
}
80 changes: 80 additions & 0 deletions Benchmarks/Sources/memory-benchmark/MemoryBenchmarks.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import ArgumentParser
import CollectionsBenchmark
import Collections

@main
struct MemoryBenchmarks: ParsableCommand {
static var configuration: CommandConfiguration {
CommandConfiguration(
commandName: "memory-statistics",
abstract: "A utility for running memory benchmarks for collection types.")
}

@OptionGroup
var sizes: Benchmark.Options.SizeSelection

mutating func run() throws {
let sizes = try self.sizes.resolveSizes()

var i = 0

var d: Dictionary<String, String> = [:]
var pd: PersistentDictionary<String, String> = [:]

print("""
Size,"Dictionary<String, String>",\
"PersistentDictionary<String, String>",\
"average node size",\
"average item depth"
""")

var sumd: Double = 0
var sump: Double = 0
for size in sizes {
while i < size.rawValue {
let key = "key \(i)"
let value = "value \(i)"
d[key] = value
pd[key] = value
i += 1
}

let dstats = d.statistics
let pstats = pd._statistics
print("""
\(size.rawValue),\
\(dstats.memoryEfficiency),\
\(pstats.memoryEfficiency),\
\(pstats.averageNodeSize),\
\(pstats.averageItemDepth)
""")
sumd += dstats.memoryEfficiency
sump += pstats.memoryEfficiency
}

let pstats = pd._statistics
complain("""
Averages:
Dictionary: \(sumd / Double(sizes.count))
PersistentDictionary: \(sump / Double(sizes.count))

PersistentDictionary at 1M items:
average node size: \(pstats.averageNodeSize)
average item depth: \(pstats.averageItemDepth)
average lookup chain length: \(pstats.averageLookupChainLength)
""")
}
}


4 changes: 4 additions & 0 deletions Sources/BitCollections/BitSet/BitSet+Extras.swift
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
//
//===----------------------------------------------------------------------===//

import _CollectionsUtilities

extension BitSet: _FastMembershipCheckable {}

extension BitSet {
/// Creates a new empty bit set with enough storage capacity to store values
/// up to the given maximum value without reallocating storage.
Expand Down
14 changes: 14 additions & 0 deletions Sources/OrderedCollections/OrderedSet/OrderedSet+Extras.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift Collections open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
//
//===----------------------------------------------------------------------===//

import _CollectionsUtilities

extension OrderedSet: _FastMembershipCheckable {}
7 changes: 7 additions & 0 deletions Sources/PersistentCollections/Node/_AncestorSlots.swift
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,13 @@ extension _AncestorSlots {
path &= ~(_Bucket.bitMask &<< level.shift)
}

/// Clear all slots at or below the specified level, by setting them to zero.
@inlinable
internal mutating func clear(atOrBelow level: _Level) {
guard level.shift < UInt.bitWidth else { return }
path &= ~(UInt.max &<< level.shift)
}

/// Truncate this path to the specified level.
/// Slots at or beyond the specified level are cleared.
@inlinable
Expand Down
55 changes: 43 additions & 12 deletions Sources/PersistentCollections/Node/_Bitmap.swift
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,20 @@ extension _Bitmap {

@inlinable @inline(__always)
internal var isEmpty: Bool { _value == 0 }

@inlinable @inline(__always)
internal var first: _Bucket? {
guard !isEmpty else { return nil }
return _Bucket(
_value: UInt8(truncatingIfNeeded: _value.trailingZeroBitCount))
}

@inlinable @inline(__always)
internal mutating func popFirst() -> _Bucket? {
guard let bucket = first else { return nil }
_value &= _value &- 1 // Clear lowest nonzero bit.
return bucket
}
}

extension _Bitmap {
Expand Down Expand Up @@ -147,23 +161,40 @@ extension _Bitmap {
}
}

extension _Bitmap: Sequence, IteratorProtocol {
extension _Bitmap: Sequence {
@usableFromInline
internal typealias Element = _Bucket
internal typealias Element = (bucket: _Bucket, slot: _Slot)

@inlinable
internal var underestimatedCount: Int { count }
@usableFromInline
@frozen
internal struct Iterator: IteratorProtocol {
@usableFromInline
internal var bitmap: _Bitmap

@usableFromInline
internal var slot: _Slot

@inlinable
internal init(_ bitmap: _Bitmap) {
self.bitmap = bitmap
self.slot = .zero
}

/// Return the index of the lowest set bit in this word,
/// and also destructively clear it.
@inlinable
internal mutating func next() -> Element? {
guard let bucket = bitmap.popFirst() else { return nil }
defer { slot = slot.next() }
return (bucket, slot)
}
}

@inlinable
internal func makeIterator() -> _Bitmap { self }
internal var underestimatedCount: Int { count }

/// Return the index of the lowest set bit in this word,
/// and also destructively clear it.
@inlinable
internal mutating func next() -> _Bucket? {
guard _value != 0 else { return nil }
let bucket = _Bucket(UInt(bitPattern: _value.trailingZeroBitCount))
_value &= _value &- 1 // Clear lowest nonzero bit.
return bucket
internal func makeIterator() -> Iterator {
Iterator(self)
}
}
5 changes: 5 additions & 0 deletions Sources/PersistentCollections/Node/_Hash.swift
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,11 @@ extension _Hash {
}

extension _Hash {
@inlinable
internal static var emptyPrefix: _Hash {
_Hash(_value: 0)
}

@inlinable
internal func appending(_ bucket: _Bucket, at level: _Level) -> Self {
assert(value >> level.shift == 0)
Expand Down
Loading