From 998acf7bb9d724abe8f8c724ef52c52e9294e82e Mon Sep 17 00:00:00 2001 From: sorz Date: Tue, 22 Jan 2019 12:57:41 +0800 Subject: [PATCH] Remove unnecessary de/re-struct on buiding trie Reduce time on loading s2t dict by around 28% --- src/lib.rs | 77 +++++++++++++++++++++++++++++++----------------------- 1 file changed, 44 insertions(+), 33 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 042176f..f070280 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -54,7 +54,7 @@ struct Leaf { value: Box, } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Default)] struct Node { value: Option>, tails: HashMap, @@ -69,13 +69,25 @@ enum DictNode { impl DictNode { fn node() -> Self { DictNode::Node ( - Node { - value: None, - tails: HashMap::new(), - } + Node::default() ) } + fn unwrap_node_mut(&mut self) -> &mut Node { + match self { + DictNode::Node (node) => node, + DictNode::Leaf (_) => panic!("expect Node, found Leaf"), + } + + } + + fn into_leaf(self) -> Leaf { + match self { + DictNode::Leaf (leaf) => leaf, + DictNode::Node (_) => panic!("expect Leaf, found Node"), + } + } + fn leaf(key: &str, value: Box) -> Self { DictNode::Leaf ( Leaf { @@ -85,38 +97,36 @@ impl DictNode { ) } - fn destruct(self) -> (Option>, HashMap) { - match self { - DictNode::Node ( Node { value, tails } ) => (value, tails), - DictNode::Leaf ( Leaf { key, value } ) => { - let mut tails = HashMap::new(); + fn add(&mut self, key: &str, value: &str) { + let self_node = match self { + DictNode::Node (node) => node, + DictNode::Leaf (_) => { + let node = Node::default(); + let leaf = mem::replace(self, DictNode::Node(node)); + let Leaf { key, value } = leaf.into_leaf(); + let mut node = self.unwrap_node_mut(); let mut key_chars = key.chars(); - let value = if let Some(hash_key) = key_chars.next() { + node.value = if let Some(hash_key) = key_chars.next() { let suffix = key_chars.as_str().into(); - tails.insert(hash_key, DictNode::leaf(suffix, value)); + node.tails.insert(hash_key, DictNode::leaf(suffix, value)); None } else { Some(value) }; - (value, tails) + node } - } - } + }; - fn add(self, key: &str, value: &str) -> Self { - let (self_value, mut tails) = self.destruct(); let mut key_chars = key.chars(); if let Some(hash_key) = key_chars.next() { let suffix = key_chars.as_str().into(); - let node = if let Some(subnode) = tails.remove(&hash_key) { - subnode.add(suffix, value) - } else { - DictNode::leaf(suffix, value.into()) - }; - tails.insert(hash_key, node); - DictNode::Node ( Node { value: self_value, tails } ) + self_node.tails.entry(hash_key) + .and_modify(|subnode| subnode.add(suffix, value)) + .or_insert_with(|| { + DictNode::leaf(suffix, value.into()) + }); } else { - DictNode::Node ( Node { value: Some(value.into()), tails } ) + self_node.value = Some(value.into()) } } @@ -161,14 +171,15 @@ impl Dict { pub fn load_lines(lines: T) -> Self where T: Iterator, S: AsRef { - let root = lines.filter_map(|line| { - let mut cols = line.as_ref().splitn(2, '\t'); - let key = cols.next()?; - let value = cols.next()?.splitn(2, ' ').next()?; - Some((key.into(), value.into())) - }).fold(DictNode::node(), |dict, (key, value): (String, String)| { - dict.add(&key, &value) - }); + let mut root = DictNode::node(); + lines.filter_map(|line| { + let mut cols = line.as_ref().splitn(2, '\t'); + let key = cols.next()?; + let value = cols.next()?.splitn(2, ' ').next()?; + Some((key.into(), value.into())) + }).for_each(|(key, value): (String, String)| { + root.add(&key, &value) + }); Dict { roots: vec![root] } }