diff --git a/src/gc-heap-snapshot.cpp b/src/gc-heap-snapshot.cpp index 00b6934386cd0..a197e70df97fc 100644 --- a/src/gc-heap-snapshot.cpp +++ b/src/gc-heap-snapshot.cpp @@ -6,6 +6,7 @@ #include "julia_assert.h" #include "gc.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/DenseMap.h" @@ -21,6 +22,7 @@ using std::set; using std::ostringstream; using std::pair; using std::make_pair; +using llvm::SmallVector; using llvm::StringMap; using llvm::DenseMap; using llvm::StringRef; @@ -57,8 +59,9 @@ void print_str_escape_json(ios_t *stream, StringRef s) // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2598-L2601 struct Edge { - size_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. + uint8_t type; // These *must* match the Enums on the JS side; control interpretation of name_or_index. size_t name_or_index; // name of the field (for objects/modules) or index of array + size_t from_node; // This is a deviation from the .heapsnapshot format to support streaming. size_t to_node; }; @@ -67,29 +70,34 @@ struct Edge { // [ "type", "name", "id", "self_size", "edge_count", "trace_node_id", "detachedness" ] // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2568-L2575 -const int k_node_number_of_fields = 7; struct Node { - size_t type; // index into snapshot->node_types + uint8_t type; // index into snapshot->node_types size_t name; size_t id; // This should be a globally-unique counter, but we use the memory address size_t self_size; size_t trace_node_id; // This is ALWAYS 0 in Javascript heap-snapshots. // whether the from_node is attached or dettached from the main application state // https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/include/v8-profiler.h#L739-L745 - int detachedness; // 0 - unknown, 1 - attached, 2 - detached - vector edges; + uint8_t detachedness; // 0 - unknown, 1 - attached, 2 - detached ~Node() JL_NOTSAFEPOINT = default; }; -struct StringTable { +class StringTable { +protected: StringMap map; - vector strings; + SmallVector strings; + size_t next_id; + +public: + StringTable() JL_NOTSAFEPOINT : map(), strings(), next_id(0) {}; size_t find_or_create_string_id(StringRef key) JL_NOTSAFEPOINT { - auto val = map.insert(make_pair(key, map.size())); - if (val.second) + auto val = map.insert(make_pair(key, next_id)); + if (val.second) { strings.push_back(val.first->first()); + next_id++; + } return val.first->second; } @@ -109,16 +117,65 @@ struct StringTable { } }; -struct HeapSnapshot { - vector nodes; - // edges are stored on each from_node +// a string table with partial strings in memory and all strings serialized to a file +class SerializedStringTable: public StringTable { + public: + + // serialize the string only if it's not already in the table + size_t serialize_if_necessary(ios_t *stream, StringRef key) JL_NOTSAFEPOINT { + auto val = map.insert(make_pair(key, next_id)); + if (val.second) { + strings.push_back(val.first->first()); + // persist the string size first, then the string itself + // so that we could read it back in the same order + size_t s_size = key.size(); + ios_write(stream, reinterpret_cast(&s_size), sizeof(size_t)); + ios_write(stream, key.data(), s_size); + next_id++; + } + return val.first->second; + } - StringTable names; + // serialize the string without checking if it is in the table or not + // and return its index. This means that we might have duplicates in the + // output string file. + size_t serialize(ios_t *stream, StringRef key) JL_NOTSAFEPOINT { + size_t s_size = key.size(); + ios_write(stream, reinterpret_cast(&s_size), sizeof(size_t)); + ios_write(stream, key.data(), s_size); + size_t current = next_id; + next_id++; + return current; + } +}; + +struct HeapSnapshot { + // names could be very large, so we keep them in a separate binary file + // and use a StringTable to keep track of the indices of frequently used strings + // to reduce duplicates in the output file to some degree + SerializedStringTable names; + // node types and edge types are very small and keep them in memory StringTable node_types; StringTable edge_types; DenseMap node_ptr_to_index_map; - size_t num_edges = 0; // For metadata, updated as you add each edge. Needed because edges owned by nodes. + size_t num_nodes = 0; // Since we stream out to files, + size_t num_edges = 0; // we need to track the counts here. + + // Node internal_root; + + // Used for streaming + // Since nodes and edges are just one giant array of integers, we stream them as + // *BINARY DATA*: a sequence of bytes, each of which is a 64-bit integer (big enough to + // fit the pointer ids). + ios_t *nodes; + ios_t *edges; + // strings are serialized to a file in binary format + ios_t *strings; + // the following file is written out as json data. + ios_t *json; + + size_t internal_root_idx = 0; // node index of the internal root node size_t _gc_root_idx = 1; // node index of the GC roots node size_t _gc_finlist_root_idx = 2; // node index of the GC finlist roots node }; @@ -129,17 +186,22 @@ int gc_heap_snapshot_enabled = 0; HeapSnapshot *g_snapshot = nullptr; extern jl_mutex_t heapsnapshot_lock; +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one); void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one); static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, jl_value_t *b, size_t name_or_index) JL_NOTSAFEPOINT; -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; -void _add_synthetic_root_entries(HeapSnapshot *snapshot); +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT; +void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT; -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one) { HeapSnapshot snapshot; - _add_synthetic_root_entries(&snapshot); + snapshot.nodes = nodes; + snapshot.edges = edges; + snapshot.strings = strings; + snapshot.json = json; jl_mutex_lock(&heapsnapshot_lock); @@ -147,6 +209,8 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) g_snapshot = &snapshot; gc_heap_snapshot_enabled = true; + _add_synthetic_root_entries(&snapshot); + // Do a full GC mark (and incremental sweep), which will invoke our callbacks on `g_snapshot` jl_gc_collect(JL_GC_FULL); @@ -158,70 +222,96 @@ JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one) // When we return, the snapshot is full // Dump the snapshot - serialize_heap_snapshot((ios_t*)stream, snapshot, all_one); + final_serialize_heap_snapshot((ios_t*)json, (ios_t*)strings, snapshot, all_one); +} + +void serialize_node(HeapSnapshot *snapshot, const Node &node) JL_NOTSAFEPOINT +{ + // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] + ios_write(snapshot->nodes, (char*)&node.type, sizeof(node.type)); + ios_write(snapshot->nodes, (char*)&node.name, sizeof(node.name)); + ios_write(snapshot->nodes, (char*)&node.id, sizeof(node.id)); + ios_write(snapshot->nodes, (char*)&node.self_size, sizeof(node.self_size)); + // NOTE: We don't write edge_count, since it's always 0. It will be reconstructed in + // post-processing. + ios_write(snapshot->nodes, (char*)&node.trace_node_id, sizeof(node.trace_node_id)); + ios_write(snapshot->nodes, (char*)&node.detachedness, sizeof(node.detachedness)); + + g_snapshot->num_nodes += 1; +} + +void serialize_edge(HeapSnapshot *snapshot, const Edge &edge) JL_NOTSAFEPOINT +{ + // ["type","name_or_index","to_node"] + ios_write(snapshot->edges, (char*)&edge.type, sizeof(edge.type)); + ios_write(snapshot->edges, (char*)&edge.name_or_index, sizeof(edge.name_or_index)); + // NOTE: Row numbers for nodes (not adjusted for k_node_number_of_fields, which is 7) + ios_write(snapshot->edges, (char*)&edge.from_node, sizeof(edge.from_node)); + ios_write(snapshot->edges, (char*)&edge.to_node, sizeof(edge.to_node)); + + g_snapshot->num_edges += 1; } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L212 // add synthetic nodes for the uber root, the GC roots, and the GC finalizer list roots -void _add_synthetic_root_entries(HeapSnapshot *snapshot) +void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT { // adds a node at id 0 which is the "uber root": // a synthetic node which points to all the GC roots. Node internal_root{ - snapshot->node_types.find_or_create_string_id("synthetic"), - snapshot->names.find_or_create_string_id(""), // name + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), + snapshot->names.serialize_if_necessary(snapshot->strings, ""), // name 0, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(internal_root); + serialize_node(snapshot, internal_root); // Add a node for the GC roots - snapshot->_gc_root_idx = snapshot->nodes.size(); + snapshot->_gc_root_idx = snapshot->internal_root_idx + 1; Node gc_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), - snapshot->names.find_or_create_string_id("GC roots"), // name + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), + snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // name snapshot->_gc_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_roots); - snapshot->nodes.front().edges.push_back(Edge{ - snapshot->edge_types.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("GC roots"), // edge label + serialize_node(snapshot, gc_roots); + Edge root_to_gc_roots{ + (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"), + snapshot->names.serialize_if_necessary(snapshot->strings, "GC roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_roots); // add a node for the gc finalizer list roots - snapshot->_gc_finlist_root_idx = snapshot->nodes.size(); + snapshot->_gc_finlist_root_idx = snapshot->internal_root_idx + 2; Node gc_finlist_roots{ - snapshot->node_types.find_or_create_string_id("synthetic"), - snapshot->names.find_or_create_string_id("GC finalizer list roots"), // name + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), + snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // name snapshot->_gc_finlist_root_idx, // id 0, // size 0, // size_t trace_node_id (unused) - 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges + 0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached }; - snapshot->nodes.push_back(gc_finlist_roots); - snapshot->nodes.front().edges.push_back(Edge{ - snapshot->edge_types.find_or_create_string_id("internal"), - snapshot->names.find_or_create_string_id("GC finlist roots"), // edge label + serialize_node(snapshot, gc_finlist_roots); + Edge root_to_gc_finlist_roots{ + (uint8_t)snapshot->edge_types.find_or_create_string_id("internal"), + snapshot->names.serialize_if_necessary(snapshot->strings, "GC finalizer list roots"), // edge label + snapshot->internal_root_idx, // from snapshot->_gc_finlist_root_idx // to - }); - snapshot->num_edges += 1; + }; + serialize_edge(snapshot, root_to_gc_finlist_roots); } // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597 // returns the index of the new node size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } @@ -291,17 +381,17 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT name = StringRef((const char*)str_.buf, str_.size); } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; - g_snapshot->names.find_or_create_string_id(name), // size_t name; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id(node_type), // size_t type; + g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name; (size_t)a, // size_t id; // We add 1 to self-size for the type tag that all heap-allocated objects have. // Also because the Chrome Snapshot viewer ignores size-0 leaves! sizeof(void*) + self_size, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); if (ios_need_close) ios_close(&str_); @@ -311,20 +401,20 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT static size_t record_pointer_to_gc_snapshot(void *a, size_t bytes, StringRef name) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(a, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - g_snapshot->nodes.push_back(Node{ - g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; - g_snapshot->names.find_or_create_string_id(name), // size_t name; + auto node = Node{ + (uint8_t)g_snapshot->node_types.find_or_create_string_id( "object"), // size_t type; + g_snapshot->names.serialize(g_snapshot->strings, name), // size_t name; (size_t)a, // size_t id; bytes, // size_t self_size; 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(g_snapshot, node); return val.first->second; } @@ -360,36 +450,29 @@ static string _fieldpath_for_slot(void *obj, void *slot) JL_NOTSAFEPOINT } } - void _gc_heap_snapshot_record_root(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto &internal_root = g_snapshot->nodes.front(); - auto to_node_idx = g_snapshot->node_ptr_to_index_map[root]; - auto edge_label = g_snapshot->names.find_or_create_string_id(name); + size_t to_node_idx = record_node_to_gc_snapshot(root); + auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name); - _record_gc_just_edge("internal", internal_root, to_node_idx, edge_label); + _record_gc_just_edge("internal", g_snapshot->internal_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_gc_roots(jl_value_t *root, char *name) JL_NOTSAFEPOINT { - record_node_to_gc_snapshot(root); - - auto from_node_idx = g_snapshot->_gc_root_idx; auto to_node_idx = record_node_to_gc_snapshot(root); - auto edge_label = g_snapshot->names.find_or_create_string_id(name); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + auto edge_label = g_snapshot->names.serialize(g_snapshot->strings, name); + + _record_gc_just_edge("internal", g_snapshot->_gc_root_idx, to_node_idx, edge_label); } void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEPOINT { - auto from_node_idx = g_snapshot->_gc_finlist_root_idx; auto to_node_idx = record_node_to_gc_snapshot(obj); ostringstream ss; ss << "finlist-" << index; - auto edge_label = g_snapshot->names.find_or_create_string_id(ss.str()); - _record_gc_just_edge("internal", g_snapshot->nodes[from_node_idx], to_node_idx, edge_label); + auto edge_label = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, ss.str()); + _record_gc_just_edge("internal", g_snapshot->_gc_finlist_root_idx, to_node_idx, edge_label); } // Add a node to the heap snapshot representing a Julia stack frame. @@ -398,20 +481,20 @@ void _gc_heap_snapshot_record_finlist(jl_value_t *obj, size_t index) JL_NOTSAFEP // Stack frame nodes point at the objects they have as local variables. size_t _record_stack_frame_node(HeapSnapshot *snapshot, void *frame) JL_NOTSAFEPOINT { - auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->nodes.size())); + auto val = g_snapshot->node_ptr_to_index_map.insert(make_pair(frame, g_snapshot->num_nodes)); if (!val.second) { return val.first->second; } - snapshot->nodes.push_back(Node{ - snapshot->node_types.find_or_create_string_id("synthetic"), - snapshot->names.find_or_create_string_id("(stack frame)"), // name + auto node = Node{ + (uint8_t)snapshot->node_types.find_or_create_string_id("synthetic"), + snapshot->names.serialize_if_necessary(snapshot->strings, "(stack frame)"), // name (size_t)frame, // id 1, // size 0, // size_t trace_node_id (unused) 0, // int detachedness; // 0 - unknown, 1 - attached; 2 - detached - vector() // outgoing edges - }); + }; + serialize_node(snapshot, node); return val.first->second; } @@ -420,30 +503,27 @@ void _gc_heap_snapshot_record_frame_to_object_edge(void *from, jl_value_t *to) J { auto from_node_idx = _record_stack_frame_node(g_snapshot, (jl_gcframe_t*)from); auto to_idx = record_node_to_gc_snapshot(to); - Node &from_node = g_snapshot->nodes[from_node_idx]; - auto name_idx = g_snapshot->names.find_or_create_string_id("local var"); - _record_gc_just_edge("internal", from_node, to_idx, name_idx); + auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "local var"); + _record_gc_just_edge("internal", from_node_idx, to_idx, name_idx); } void _gc_heap_snapshot_record_task_to_frame_edge(jl_task_t *from, void *to) JL_NOTSAFEPOINT { auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; - auto name_idx = g_snapshot->names.find_or_create_string_id("stack"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "stack"); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_frame_to_frame_edge(jl_gcframe_t *from, jl_gcframe_t *to) JL_NOTSAFEPOINT { auto from_node_idx = _record_stack_frame_node(g_snapshot, from); auto to_node_idx = _record_stack_frame_node(g_snapshot, to); - Node &from_node = g_snapshot->nodes[from_node_idx]; - auto name_idx = g_snapshot->names.find_or_create_string_id("next frame"); - _record_gc_just_edge("internal", from_node, to_node_idx, name_idx); + auto name_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "next frame"); + _record_gc_just_edge("internal", from_node_idx, to_node_idx, name_idx); } void _gc_heap_snapshot_record_array_edge(jl_value_t *from, jl_value_t *to, size_t index) JL_NOTSAFEPOINT @@ -455,7 +535,7 @@ void _gc_heap_snapshot_record_object_edge(jl_value_t *from, jl_value_t *to, void { string path = _fieldpath_for_slot(from, slot); _record_gc_edge("property", from, to, - g_snapshot->names.find_or_create_string_id(path)); + g_snapshot->names.serialize_if_necessary(g_snapshot->strings, path)); } void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t *bindings, jl_value_t *bindingkeyset) JL_NOTSAFEPOINT @@ -463,46 +543,44 @@ void _gc_heap_snapshot_record_module_to_binding(jl_module_t *module, jl_value_t auto from_node_idx = record_node_to_gc_snapshot((jl_value_t*)module); auto to_bindings_idx = record_node_to_gc_snapshot(bindings); auto to_bindingkeyset_idx = record_node_to_gc_snapshot(bindingkeyset); - auto &from_node = g_snapshot->nodes[from_node_idx]; + if (to_bindings_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindings_idx, g_snapshot->names.find_or_create_string_id("bindings")); + _record_gc_just_edge("internal", from_node_idx, to_bindings_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindings")); } if (to_bindingkeyset_idx > 0) { - _record_gc_just_edge("internal", from_node, to_bindingkeyset_idx, g_snapshot->names.find_or_create_string_id("bindingkeyset")); + _record_gc_just_edge("internal", from_node_idx, to_bindingkeyset_idx, g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "bindingkeyset")); } } void _gc_heap_snapshot_record_internal_array_edge(jl_value_t *from, jl_value_t *to) JL_NOTSAFEPOINT { _record_gc_edge("internal", from, to, - g_snapshot->names.find_or_create_string_id("")); + g_snapshot->names.serialize_if_necessary(g_snapshot->strings, "")); } void _gc_heap_snapshot_record_hidden_edge(jl_value_t *from, void* to, size_t bytes, uint16_t alloc_type) JL_NOTSAFEPOINT { - size_t name_or_idx = g_snapshot->names.find_or_create_string_id(""); + // valid alloc_type values are 0, 1, 2 + assert(alloc_type <= 2); + size_t name_or_idx = g_snapshot->names.serialize_if_necessary(g_snapshot->strings, ""); auto from_node_idx = record_node_to_gc_snapshot(from); - const char *alloc_kind; + const char *alloc_kind = NULL; switch (alloc_type) { case 0: - alloc_kind = ""; + alloc_kind = ""; break; case 1: - alloc_kind = ""; + alloc_kind = ""; break; case 2: - alloc_kind = ""; - break; - default: - alloc_kind = ""; + alloc_kind = ""; break; } auto to_node_idx = record_pointer_to_gc_snapshot(to, bytes, alloc_kind); - auto &from_node = g_snapshot->nodes[from_node_idx]; - _record_gc_just_edge("hidden", from_node, to_node_idx, name_or_idx); + _record_gc_just_edge("hidden", from_node_idx, to_node_idx, name_or_idx); } static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, @@ -511,104 +589,39 @@ static inline void _record_gc_edge(const char *edge_type, jl_value_t *a, auto from_node_idx = record_node_to_gc_snapshot(a); auto to_node_idx = record_node_to_gc_snapshot(b); - auto &from_node = g_snapshot->nodes[from_node_idx]; - - _record_gc_just_edge(edge_type, from_node, to_node_idx, name_or_idx); + _record_gc_just_edge(edge_type, from_node_idx, to_node_idx, name_or_idx); } -void _record_gc_just_edge(const char *edge_type, Node &from_node, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT +void _record_gc_just_edge(const char *edge_type, size_t from_idx, size_t to_idx, size_t name_or_idx) JL_NOTSAFEPOINT { - from_node.edges.push_back(Edge{ - g_snapshot->edge_types.find_or_create_string_id(edge_type), + auto edge = Edge{ + (uint8_t)g_snapshot->edge_types.find_or_create_string_id(edge_type), name_or_idx, // edge label + from_idx, // from to_idx // to - }); + }; - g_snapshot->num_edges += 1; + serialize_edge(g_snapshot, edge); } -void serialize_heap_snapshot(ios_t *stream, HeapSnapshot &snapshot, char all_one) +void final_serialize_heap_snapshot(ios_t *json, ios_t *strings, HeapSnapshot &snapshot, char all_one) { // mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L2567-L2567 - ios_printf(stream, "{\"snapshot\":{"); - ios_printf(stream, "\"meta\":{"); - ios_printf(stream, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); - ios_printf(stream, "\"node_types\":["); - snapshot.node_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); - ios_printf(stream, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); - ios_printf(stream, "\"edge_types\":["); - snapshot.edge_types.print_json_array(stream, false); - ios_printf(stream, ","); - ios_printf(stream, "\"string_or_number\",\"from_node\"]"); - ios_printf(stream, "},\n"); // end "meta" - ios_printf(stream, "\"node_count\":%zu,", snapshot.nodes.size()); - ios_printf(stream, "\"edge_count\":%zu", snapshot.num_edges); - ios_printf(stream, "},\n"); // end "snapshot" - - ios_printf(stream, "\"nodes\":["); - bool first_node = true; - // use a set to track the nodes that do not have parents - set orphans; - for (const auto &from_node : snapshot.nodes) { - if (first_node) { - first_node = false; - } - else { - ios_printf(stream, ","); - } - // ["type","name","id","self_size","edge_count","trace_node_id","detachedness"] - ios_printf(stream, "%zu,%zu,%zu,%zu,%zu,%zu,%d\n", - from_node.type, - from_node.name, - from_node.id, - all_one ? (size_t)1 : from_node.self_size, - from_node.edges.size(), - from_node.trace_node_id, - from_node.detachedness); - if (from_node.id != snapshot._gc_root_idx && from_node.id != snapshot._gc_finlist_root_idx) { - // find the node index from the node object pointer - void * ptr = (void*)from_node.id; - size_t n_id = snapshot.node_ptr_to_index_map[ptr]; - orphans.insert(n_id); - } else { - orphans.insert(from_node.id); - } - } - ios_printf(stream, "],\n"); - - ios_printf(stream, "\"edges\":["); - bool first_edge = true; - for (const auto &from_node : snapshot.nodes) { - for (const auto &edge : from_node.edges) { - if (first_edge) { - first_edge = false; - } - else { - ios_printf(stream, ","); - } - ios_printf(stream, "%zu,%zu,%zu\n", - edge.type, - edge.name_or_index, - edge.to_node * k_node_number_of_fields); - auto n_id = edge.to_node; - auto it = orphans.find(n_id); - if (it != orphans.end()) { - // remove the node from the orphans if it has at least one incoming edge - orphans.erase(it); - } - } - } - ios_printf(stream, "],\n"); // end "edges" - - ios_printf(stream, "\"strings\":"); - - snapshot.names.print_json_array(stream, true); - - ios_printf(stream, "}"); - - // remove the uber node from the orphans - orphans.erase(0); - assert(orphans.size() == 0 && "all nodes except the uber node should have at least one incoming edge"); + ios_printf(json, "{\"snapshot\":{"); + ios_printf(json, "\"meta\":{"); + ios_printf(json, "\"node_fields\":[\"type\",\"name\",\"id\",\"self_size\",\"edge_count\",\"trace_node_id\",\"detachedness\"],"); + ios_printf(json, "\"node_types\":["); + snapshot.node_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string\", \"number\", \"number\", \"number\", \"number\", \"number\"],"); + ios_printf(json, "\"edge_fields\":[\"type\",\"name_or_index\",\"to_node\"],"); + ios_printf(json, "\"edge_types\":["); + snapshot.edge_types.print_json_array(json, false); + ios_printf(json, ","); + ios_printf(json, "\"string_or_number\",\"from_node\"]"); + ios_printf(json, "},\n"); // end "meta" + ios_printf(json, "\"node_count\":%zu,", snapshot.num_nodes); + ios_printf(json, "\"edge_count\":%zu", snapshot.num_edges); + ios_printf(json, "}\n"); // end "snapshot" + ios_printf(json, "}"); } diff --git a/src/gc-heap-snapshot.h b/src/gc-heap-snapshot.h index 1799063825e83..70884f5f62d6a 100644 --- a/src/gc-heap-snapshot.h +++ b/src/gc-heap-snapshot.h @@ -120,7 +120,8 @@ static inline void gc_heap_snapshot_record_finlist(jl_value_t *finlist, size_t i // --------------------------------------------------------------------- // Functions to call from Julia to take heap snapshot // --------------------------------------------------------------------- -JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *stream, char all_one); +JL_DLLEXPORT void jl_gc_take_heap_snapshot(ios_t *nodes, ios_t *edges, + ios_t *strings, ios_t *json, char all_one); #ifdef __cplusplus diff --git a/stdlib/Profile/docs/src/index.md b/stdlib/Profile/docs/src/index.md index 85a8452ab06df..8832d5734cb98 100644 --- a/stdlib/Profile/docs/src/index.md +++ b/stdlib/Profile/docs/src/index.md @@ -133,5 +133,24 @@ Traces and records julia objects on the heap. This only records objects known to garbage collector. Memory allocated by external libraries not managed by the garbage collector will not show up in the snapshot. +To avoid OOMing while recording the snapshot, we added a streaming option to stream out the heap snapshot +into four files, + +```julia-repl +julia> using Profile + +julia> Profile.take_heap_snapshot("snapshot"; streaming=true) +``` + +where "snapshot" is the filepath as the prefix for the generated files. + +Once the snapshot files are generated, they could be assembled offline with the following command: + +```julia-repl +julia> using Profile + +julia> Profile.HeapSnapshot.assemble_snapshot("snapshot", "snapshot.heapsnapshot") +``` + The resulting heap snapshot file can be uploaded to chrome devtools to be viewed. For more information, see the [chrome devtools docs](https://developer.chrome.com/docs/devtools/memory-problems/heap-snapshots/#view_snapshots). diff --git a/stdlib/Profile/src/Profile.jl b/stdlib/Profile/src/Profile.jl index 24d2512dcbedf..7b5632892fafd 100644 --- a/stdlib/Profile/src/Profile.jl +++ b/stdlib/Profile/src/Profile.jl @@ -1220,9 +1220,8 @@ end """ - Profile.take_heap_snapshot(io::IOStream, all_one::Bool=false) - Profile.take_heap_snapshot(filepath::String, all_one::Bool=false) - Profile.take_heap_snapshot(all_one::Bool=false; dir::String) + Profile.take_heap_snapshot(filepath::String, all_one::Bool=false, streaming=false) + Profile.take_heap_snapshot(all_one::Bool=false; dir::String, streaming=false) Write a snapshot of the heap, in the JSON format expected by the Chrome Devtools Heap Snapshot viewer (.heapsnapshot extension) to a file @@ -1232,16 +1231,67 @@ full file path, or IO stream. If `all_one` is true, then report the size of every object as one so they can be easily counted. Otherwise, report the actual size. + +If `streaming` is true, we will stream the snapshot data out into four files, using filepath +as the prefix, to avoid having to hold the entire snapshot in memory. This option should be +used for any setting where your memory is constrained. These files can then be reassembled +by calling Profile.HeapSnapshot.assemble_snapshot(), which can +be done offline. + +NOTE: We strongly recommend setting streaming=true for performance reasons. Reconstructing +the snapshot from the parts requires holding the entire snapshot in memory, so if the +snapshot is large, you can run out of memory while processing it. Streaming allows you to +reconstruct the snapshot offline, after your workload is done running. +If you do attempt to collect a snapshot with streaming=false (the default, for +backwards-compatibility) and your process is killed, note that this will always save the +parts in the same directory as your provided filepath, so you can still reconstruct the +snapshot after the fact, via `assemble_snapshot()`. """ -function take_heap_snapshot(io::IOStream, all_one::Bool=false) - Base.@_lock_ios(io, ccall(:jl_gc_take_heap_snapshot, Cvoid, (Ptr{Cvoid}, Cchar), io.handle, Cchar(all_one))) -end -function take_heap_snapshot(filepath::String, all_one::Bool=false) - open(filepath, "w") do io - take_heap_snapshot(io, all_one) +function take_heap_snapshot(filepath::AbstractString, all_one::Bool=false; streaming::Bool=false) + if streaming + _stream_heap_snapshot(filepath, all_one) + else + # Support the legacy, non-streaming mode, by first streaming the parts, then + # reassembling it after we're done. + prefix = filepath + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, filepath) end return filepath end +function take_heap_snapshot(io::IO, all_one::Bool=false) + # Support the legacy, non-streaming mode, by first streaming the parts to a tempdir, + # then reassembling it after we're done. + dir = tempdir() + prefix = joinpath(dir, "snapshot") + _stream_heap_snapshot(prefix, all_one) + Profile.HeapSnapshot.assemble_snapshot(prefix, io) +end +function _stream_heap_snapshot(prefix::AbstractString, all_one::Bool) + # Nodes and edges are binary files + open("$prefix.nodes", "w") do nodes + open("$prefix.edges", "w") do edges + open("$prefix.strings", "w") do strings + # The following file is json data + open("$prefix.metadata.json", "w") do json + Base.@_lock_ios(nodes, + Base.@_lock_ios(edges, + Base.@_lock_ios(strings, + Base.@_lock_ios(json, + ccall(:jl_gc_take_heap_snapshot, + Cvoid, + (Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid},Ptr{Cvoid}, Cchar), + nodes.handle, edges.handle, strings.handle, json.handle, + Cchar(all_one)) + ) + ) + ) + ) + end + end + end + end +end function take_heap_snapshot(all_one::Bool=false; dir::Union{Nothing,S}=nothing) where {S <: AbstractString} fname = "$(getpid())_$(time_ns()).heapsnapshot" if isnothing(dir) @@ -1277,6 +1327,7 @@ function take_page_profile(filepath::String) end include("Allocs.jl") +include("heapsnapshot_reassemble.jl") include("precompile.jl") end # module diff --git a/stdlib/Profile/src/heapsnapshot_reassemble.jl b/stdlib/Profile/src/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..b39f53a8bda03 --- /dev/null +++ b/stdlib/Profile/src/heapsnapshot_reassemble.jl @@ -0,0 +1,231 @@ +# This file is a part of Julia. License is MIT: https://julialang.org/license + +module HeapSnapshot + +""" + assemble_snapshot(filepath::AbstractString, out_file::AbstractString) + +Assemble a .heapsnapshot file from the .json files produced by `Profile.take_snapshot`. +""" + +# SoA layout to reduce padding +struct Edges + type::Vector{Int8} # index into `snapshot.meta.edge_types` + name_or_index::Vector{UInt} # Either an index into `snapshot.strings`, or the index in an array, depending on edge_type + to_pos::Vector{UInt} # index into `snapshot.nodes` +end +function Edges(n::Int) + Edges( + Vector{Int8}(undef, n), + Vector{UInt}(undef, n), + Vector{UInt}(undef, n), + ) +end +Base.length(n::Edges) = length(n.type) + +# trace_node_id and detachedness are always 0 in the snapshots Julia produces so we don't store them +struct Nodes + type::Vector{Int8} # index into `snapshot.meta.node_types` + name_idx::Vector{UInt32} # index into `snapshot.strings` + id::Vector{UInt} # unique id, in julia it is the address of the object + self_size::Vector{Int} # size of the object itself, not including the size of its fields + edge_count::Vector{UInt} # number of outgoing edges + edges::Edges # outgoing edges + # This is the main complexity of the .heapsnapshot format, and it's the reason we need + # to read in all the data before writing it out. The edges vector contains all edges, + # but organized by which node they came from. First, it contains all the edges coming + # out of node 0, then all edges leaving node 1, etc. So we need to have visited all + # edges, and assigned them to their corresponding nodes, before we can emit the file. + edge_idxs::Vector{Vector{UInt}} # indexes into edges, keeping per-node outgoing edge ids +end +function Nodes(n::Int, e::Int) + Nodes( + Vector{Int8}(undef, n), + Vector{UInt32}(undef, n), + Vector{UInt}(undef, n), + Vector{Int}(undef, n), + Vector{UInt32}(undef, n), + Edges(e), + [Vector{UInt}() for _ in 1:n], # Take care to construct n separate empty vectors + ) +end +Base.length(n::Nodes) = length(n.type) + +const k_node_number_of_fields = 7 + +# Like Base.dec, but doesn't allocate a string and writes directly to the io object +# We know all of the numbers we're about to write fit into a UInt and are non-negative +let _dec_d100 = UInt16[(0x30 + i % 10) << 0x8 + (0x30 + i ÷ 10) for i = 0:99] + global _write_decimal_number + _write_decimal_number(io, x::Integer, buf) = _write_decimal_number(io, unsigned(x), buf) + function _write_decimal_number(io, x::Unsigned, digits_buf) + buf = digits_buf + n = ndigits(x) + i = n + @inbounds while i >= 2 + d, r = divrem(x, 0x64) + d100 = _dec_d100[(r % Int)::Int + 1] + buf[i-1] = d100 % UInt8 + buf[i] = (d100 >> 0x8) % UInt8 + x = oftype(x, d) + i -= 2 + end + if i > 0 + @inbounds buf[i] = 0x30 + (rem(x, 0xa) % UInt8)::UInt8 + end + write(io, @view buf[max(i, 1):n]) + end +end + +function assemble_snapshot(in_prefix, out_file::AbstractString = in_prefix) + open(out_file, "w") do io + assemble_snapshot(in_prefix, io) + end +end + +# Manually parse and write the .json files, given that we don't have JSON import/export in +# julia's stdlibs. +function assemble_snapshot(in_prefix, io::IO) + preamble = read(string(in_prefix, ".metadata.json"), String) + pos = last(findfirst("node_count\":", preamble)) + 1 + endpos = findnext(==(','), preamble, pos) - 1 + node_count = parse(Int, String(@view preamble[pos:endpos])) + + pos = last(findnext("edge_count\":", preamble, endpos)) + 1 + endpos = findnext(==('}'), preamble, pos) - 1 + edge_count = parse(Int, String(@view preamble[pos:endpos])) + + nodes = Nodes(node_count, edge_count) + + orphans = Set{UInt}() # nodes that have no incoming edges + # Parse nodes with empty edge counts that we need to fill later + nodes_file = open(string(in_prefix, ".nodes"), "r") + for i in 1:length(nodes) + node_type = read(nodes_file, Int8) + node_name_idx = read(nodes_file, UInt) + id = read(nodes_file, UInt) + self_size = read(nodes_file, Int) + @assert read(nodes_file, Int) == 0 # trace_node_id + @assert read(nodes_file, Int8) == 0 # detachedness + + nodes.type[i] = node_type + nodes.name_idx[i] = node_name_idx + nodes.id[i] = id + nodes.self_size[i] = self_size + nodes.edge_count[i] = 0 # edge_count + # populate the orphans set with node index + push!(orphans, i-1) + end + + # Parse the edges to fill in the edge counts for nodes and correct the to_node offsets + edges_file = open(string(in_prefix, ".edges"), "r") + for i in 1:length(nodes.edges) + edge_type = read(edges_file, Int8) + edge_name_or_index = read(edges_file, UInt) + from_node = read(edges_file, UInt) + to_node = read(edges_file, UInt) + + nodes.edges.type[i] = edge_type + nodes.edges.name_or_index[i] = edge_name_or_index + nodes.edges.to_pos[i] = to_node * k_node_number_of_fields # 7 fields per node, the streaming format doesn't multiply the offset by 7 + nodes.edge_count[from_node + 1] += UInt32(1) # C and JSON use 0-based indexing + push!(nodes.edge_idxs[from_node + 1], i) # Index into nodes.edges + # remove the node from the orphans if it has at least one incoming edge + if to_node in orphans + delete!(orphans, to_node) + end + end + + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + println(io, @view(preamble[1:end-2]), ",") # remove trailing "}\n", we don't end the snapshot here + println(io, "\"nodes\":[") + for i in 1:length(nodes) + i > 1 && println(io, ",") + _write_decimal_number(io, nodes.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.name_idx[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.id[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.self_size[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edge_count[i], _digits_buf) + print(io, ",0,0") + end + print(io, "],\"edges\":[") + e = 1 + for n in 1:length(nodes) + count = nodes.edge_count[n] + len_edges = length(nodes.edge_idxs[n]) + @assert count == len_edges "For node $n: $count != $len_edges" + for i in nodes.edge_idxs[n] + e > 1 && print(io, ",") + println(io) + _write_decimal_number(io, nodes.edges.type[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.name_or_index[i], _digits_buf) + print(io, ",") + _write_decimal_number(io, nodes.edges.to_pos[i], _digits_buf) + if !(nodes.edges.to_pos[i] % k_node_number_of_fields == 0) + @warn "Bug in to_pos for edge $i from node $n: $(nodes.edges.to_pos[i])" + end + e += 1 + end + end + println(io, "],") + + println(io, "\"strings\":[") + open(string(in_prefix, ".strings"), "r") do strings_io + first = true + while !eof(strings_io) + str_size = read(strings_io, UInt) + str_bytes = read(strings_io, str_size) + str = String(str_bytes) + if first + print_str_escape_json(io, str) + first = false + else + print(io, ",\n") + print_str_escape_json(io, str) + end + end + end + print(io, "]}") + + # remove the uber node from the orphans + if 0 in orphans + delete!(orphans, 0) + end + + @assert isempty(orphans) "Orphaned nodes: $(orphans), node count: $(length(nodes)), orphan node count: $(length(orphans))" + + return nothing +end + +function print_str_escape_json(stream::IO, s::AbstractString) + print(stream, '"') + for c in s + if c == '"' + print(stream, "\\\"") + elseif c == '\\' + print(stream, "\\\\") + elseif c == '\b' + print(stream, "\\b") + elseif c == '\f' + print(stream, "\\f") + elseif c == '\n' + print(stream, "\\n") + elseif c == '\r' + print(stream, "\\r") + elseif c == '\t' + print(stream, "\\t") + elseif '\x00' <= c <= '\x1f' + print(stream, "\\u", lpad(string(UInt16(c), base=16), 4, '0')) + else + print(stream, c) + end + end + print(stream, '"') +end + +end diff --git a/stdlib/Profile/test/heapsnapshot_reassemble.jl b/stdlib/Profile/test/heapsnapshot_reassemble.jl new file mode 100644 index 0000000000000..e1d6621647671 --- /dev/null +++ b/stdlib/Profile/test/heapsnapshot_reassemble.jl @@ -0,0 +1,54 @@ +using Test + +@testset "_write_decimal_number" begin + _digits_buf = zeros(UInt8, ndigits(typemax(UInt))) + io = IOBuffer() + + test_write(d) = begin + Profile.HeapSnapshot._write_decimal_number(io, d, _digits_buf) + s = String(take!(io)) + seekstart(io) + return s + end + @test test_write(0) == "0" + @test test_write(99) == "99" + + @test test_write(UInt8(0)) == "0" + @test test_write(UInt32(0)) == "0" + @test test_write(Int32(0)) == "0" + + @test test_write(UInt8(99)) == "99" + @test test_write(UInt32(99)) == "99" + @test test_write(Int32(99)) == "99" + + # Sample among possible UInts we might print + for x in typemin(UInt8):typemax(UInt8) + @test test_write(x) == string(x) + end + for x in typemin(UInt):typemax(UInt)÷10001:typemax(UInt) + @test test_write(x) == string(x) + end +end + +function test_print_str_escape_json(input::AbstractString, expected::AbstractString) + output = IOBuffer() + Profile.HeapSnapshot.print_str_escape_json(output, input) + @test String(take!(output)) == expected +end + +@testset "print_str_escape_json" begin + # Test basic string escaping + test_print_str_escape_json("\"hello\"", "\"\\\"hello\\\"\"") + + # Test escaping of control characters + test_print_str_escape_json("\x01\x02\x03", "\"\\u0001\\u0002\\u0003\"") + + # Test escaping of other special characters + test_print_str_escape_json("\b\f\n\r\t", "\"\\b\\f\\n\\r\\t\"") + + # Test handling of mixed characters + test_print_str_escape_json("abc\ndef\"ghi", "\"abc\\ndef\\\"ghi\"") + + # Test handling of empty string + test_print_str_escape_json("", "\"\"") +end diff --git a/stdlib/Profile/test/runtests.jl b/stdlib/Profile/test/runtests.jl index f4d64c791956f..7da2ee23a144f 100644 --- a/stdlib/Profile/test/runtests.jl +++ b/stdlib/Profile/test/runtests.jl @@ -305,3 +305,4 @@ end end include("allocs.jl") +include("heapsnapshot_reassemble.jl")