Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Serialization optimizations #876

Merged
merged 2 commits into from
Jan 19, 2018
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 31 additions & 26 deletions build_runner/lib/src/asset_graph/serialization.dart
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ part of 'graph.dart';
///
/// This should be incremented any time the serialize/deserialize formats
/// change.
const _version = 15;
const _version = 16;

/// Deserializes an [AssetGraph] from a [Map].
class _AssetGraphDeserializer {
final _idToAssetId = <int, AssetId>{};
// Iteration order does not matter
final _idToAssetId = new HashMap<int, AssetId>();
final Map _serializedGraph;

_AssetGraphDeserializer(List<int> bytes)
Expand All @@ -28,10 +29,13 @@ class _AssetGraphDeserializer {
var graph = new AssetGraph._(
_deserializeDigest(_serializedGraph['buildActionsDigest'] as String));

var packageNames = _serializedGraph['packages'] as List<String>;

// Read in the id => AssetId map from the graph first.
for (var descriptor in _serializedGraph['serializedAssetIds']) {
_idToAssetId[descriptor[0] as int] =
new AssetId(descriptor[1] as String, descriptor[2] as String);
var assetPaths = _serializedGraph['assetPaths'] as List;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: can we add a more specific type here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could make it List<dynamic> but I don't think it buys much. This list is a mix of ints and Strings.

for (var i = 0; i < assetPaths.length; i += 2) {
var packageName = packageNames[assetPaths[i + 1] as int];
_idToAssetId[i ~/ 2] = new AssetId(packageName, assetPaths[i] as String);
}

// Read in all the nodes and their outputs.
Expand Down Expand Up @@ -115,44 +119,41 @@ class _AssetGraphDeserializer {
return node;
}

List<AssetId> _deserializeAssetIds(List<int> serializedIds) =>
serializedIds.map((id) => _idToAssetId[id]).toList();
Iterable<AssetId> _deserializeAssetIds(List<int> serializedIds) =>
serializedIds.map((id) => _idToAssetId[id]);

bool _deserializeBool(int value) => value == 0 ? false : true;
}

/// Serializes an [AssetGraph] into a [Map].
class _AssetGraphSerializer {
final _assetIdToId = <AssetId, int>{};
// Iteration order does not matter
final _assetIdToId = new HashMap<AssetId, int>();

final AssetGraph _graph;

_AssetGraphSerializer(this._graph);

/// Perform the serialization, should only be called once.
List<int> serialize() {
/// Compute numeric identifiers for all asset ids.
var next = 0;
var pathId = 0;
// [path0, packageId0, path1, packageId1, ...]
var assetPaths = <dynamic>[];
var packages = _graph._nodesByPackage.keys.toList(growable: false);
for (var node in _graph.allNodes) {
_assetIdToId[node.id] = next;
next++;
_assetIdToId[node.id] = pathId;
pathId++;
assetPaths.add(node.id.path);
assetPaths.add(packages.indexOf(node.id.package));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you try doing an ordered set for packages as well? most likely it wouldn't help though as the number of packages isn't that large but we are doing this lookup a lot

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't get a fine-grained enough measurement to be sure but for a small enough N I think lists tend to be fastest. LinkedHashSet doesn't have an indexOf operator.

My intuition is that for the number of packages we'll have this is going to be fastest - and this reads the closest to the intention of the code. If we want later to do more benchmarking we can try changing this. I did run the map version and any performance difference washed out in the noise.

}

var result = <String, dynamic>{
'version': _version,
'nodes': _graph.allNodes.map(_serializeNode).toList(),
'nodes': _graph.allNodes.map(_serializeNode).toList(growable: false),
'buildActionsDigest': _serializeDigest(_graph.buildActionsDigest),
'packages': packages,
'assetPaths': assetPaths,
};

// Store the id => AssetId mapping as a nested list so we don't have to
// stringify the integers and parse them back (ints aren't valid JSON
// keys).
var serializedAssetIds = <List>[];
_assetIdToId.forEach((k, v) {
serializedAssetIds.add([v, k.package, k.path]);
});
result['serializedAssetIds'] = serializedAssetIds;

return UTF8.encode(JSON.encode(result));
}

Expand Down Expand Up @@ -238,11 +239,13 @@ class _WrappedAssetNode extends Object with ListMixin implements List {
case _Field.Id:
return serializer._assetIdToId[node.id];
case _Field.Outputs:
return node.outputs.map((id) => serializer._assetIdToId[id]).toList();
return node.outputs
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the future eliminating these allocations would be good as well, but not for this pr (for primaryOutputs as well)

.map((id) => serializer._assetIdToId[id])
.toList(growable: false);
case _Field.PrimaryOutputs:
return node.primaryOutputs
.map((id) => serializer._assetIdToId[id])
.toList();
.toList(growable: false);
case _Field.Digest:
return _serializeDigest(node.lastKnownDigest);
default:
Expand Down Expand Up @@ -289,7 +292,9 @@ class _WrappedGeneratedAssetNode extends _WrappedAssetNode {
case _Field.PhaseNumber:
return generatedNode.phaseNumber;
case _Field.Globs:
return generatedNode.globs.map((glob) => glob.pattern).toList();
return generatedNode.globs
.map((glob) => glob.pattern)
.toList(growable: false);
case _Field.NeedsUpdate:
return _serializeBool(generatedNode.needsUpdate);
case _Field.PreviousInputsDigest:
Expand Down