aptos-labs · georgemitenkov · Jul 17, 2024 · gelash · Jul 20, 2024 · gelash
diff --git a/third_party/move/move-vm/runtime/src/data_cache.rs b/third_party/move/move-vm/runtime/src/data_cache.rs
@@ -8,9 +8,7 @@ use crate::{
 };
 use bytes::Bytes;
 use move_binary_format::{
-    deserializer::DeserializerConfig,
-    errors::*,
-    file_format::{CompiledModule, CompiledScript},
+    deserializer::DeserializerConfig, errors::*, file_format::CompiledModule,
 };
 use move_core_types::{
     account_address::AccountAddress,
@@ -86,7 +84,6 @@ pub(crate) struct TransactionDataCache<'r> {
     deserializer_config: DeserializerConfig,
 
     // Caches to help avoid duplicate deserialization calls.
-    compiled_scripts: BTreeMap<[u8; 32], Arc<CompiledScript>>,
     compiled_modules: BTreeMap<ModuleId, (Arc<CompiledModule>, usize, [u8; 32])>,
 }
 
@@ -101,7 +98,6 @@ impl<'r> TransactionDataCache<'r> {
             remote,
             account_map: BTreeMap::new(),
             deserializer_config,
-            compiled_scripts: BTreeMap::new(),
             compiled_modules: BTreeMap::new(),
         }
     }
@@ -281,32 +277,6 @@ impl<'r> TransactionDataCache<'r> {
         load_module_impl(self.remote, &self.account_map, module_id)
     }
 
-    pub(crate) fn load_compiled_script_to_cache(
-        &mut self,
-        script_blob: &[u8],
-        hash_value: [u8; 32],
-    ) -> VMResult<Arc<CompiledScript>> {
-        let cache = &mut self.compiled_scripts;
-        match cache.entry(hash_value) {
-            btree_map::Entry::Occupied(entry) => Ok(entry.get().clone()),
-            btree_map::Entry::Vacant(entry) => {
-                let script = match CompiledScript::deserialize_with_config(
-                    script_blob,
-                    &self.deserializer_config,
-                ) {
-                    Ok(script) => script,
-                    Err(err) => {
-                        let msg = format!("[VM] deserializer for script returned error: {:?}", err);
-                        return Err(PartialVMError::new(StatusCode::CODE_DESERIALIZATION_ERROR)
-                            .with_message(msg)
-                            .finish(Location::Script));
-                    },
-                };
-                Ok(entry.insert(Arc::new(script)).clone())
-            },
-        }
-    }
-
     pub(crate) fn load_compiled_module_to_cache(
         &mut self,
         id: ModuleId,

diff --git a/third_party/move/move-vm/runtime/src/loader/mod.rs b/third_party/move/move-vm/runtime/src/loader/mod.rs
@@ -50,6 +50,7 @@
 mod script;
 mod type_loader;
 
+use crate::loader::script::ScriptCacheEntry;
 pub use function::LoadedFunction;
 pub(crate) use function::{Function, FunctionHandle, FunctionInstantiation, Scope};
 pub(crate) use modules::{Module, ModuleCache, ModuleStorage, ModuleStorageAdapter};
@@ -60,15 +61,14 @@
 type ScriptHash = [u8; 32];
 
 // A simple cache that offers both a HashMap and a Vector lookup.
-// Values are forced into a `Arc` so they can be used from multiple thread.
 // Access to this cache is always under a `RwLock`.
 #[derive(Clone)]
 pub(crate) struct BinaryCache<K, V> {
     // Notice that we are using the HashMap implementation from the hashbrown crate, not the
     // one from std, as it allows alternative key representations to be used for lookup,
     // making certain optimizations possible.
     id_map: hashbrown::HashMap<K, usize>,
-    binaries: Vec<Arc<V>>,
+    binaries: Vec<V>,
 }
 
 impl<K, V> BinaryCache<K, V>
@@ -82,16 +82,16 @@
         }
     }
 
-    fn insert(&mut self, key: K, binary: V) -> &Arc<V> {
-        self.binaries.push(Arc::new(binary));
+    fn insert(&mut self, key: K, binary: V) -> &V {
+        self.binaries.push(binary);
         let idx = self.binaries.len() - 1;
         self.id_map.insert(key, idx);
         self.binaries
             .last()
             .expect("BinaryCache: last() after push() impossible failure")
     }
 
-    fn get<Q>(&self, key: &Q) -> Option<&Arc<V>>
+    fn get<Q>(&self, key: &Q) -> Option<&V>
     where
         Q: Hash + Eq + Equivalent<K>,
     {
@@ -269,7 +269,15 @@
         sha3_256.update(script_blob);
         let hash_value: [u8; 32] = sha3_256.finalize().into();
 
-        let script = data_store.load_compiled_script_to_cache(script_blob, hash_value)?;
+        let mut scripts = self.scripts.write();
+        let script = match scripts.get(&hash_value) {
+            Some(ScriptCacheEntry::Verified(script)) => script.script.clone(),
+            Some(ScriptCacheEntry::Deserialized(compiled_script)) => compiled_script.clone(),
+            None => {
+                let compiled_script = self.deserialize_script(script_blob)?;
+                scripts.insert_compiled_script(hash_value, compiled_script)
+            },
+        };
         let script = traversal_context.referenced_scripts.alloc(script);
 
         // TODO(Gas): Should we charge dependency gas for the script itself?
@@ -307,16 +315,27 @@
 
         let mut scripts = self.scripts.write();
         let main = match scripts.get(&hash_value) {
-            Some(cached) => cached,
+            Some(ScriptCacheEntry::Verified(script)) => script.entry_point(),
+            Some(ScriptCacheEntry::Deserialized(compiled_script)) => {
+                self.verify_script(compiled_script.as_ref(), data_store, module_store)?;
+                let script = Script::new(
+                    Arc::clone(compiled_script),
+                    &hash_value,
+                    module_store,
+                    &self.name_cache,
+                )?;
+                scripts.insert_verified_script(hash_value, script)
+            },
             None => {
-                let ver_script = self.deserialize_and_verify_script(
-                    script_blob,
-                    hash_value,
-                    data_store,
+                let compiled_script = self.deserialize_script(script_blob)?;
+                self.verify_script(&compiled_script, data_store, module_store)?;
+                let script = Script::new(
+                    Arc::new(compiled_script),
+                    &hash_value,
                     module_store,
+                    &self.name_cache,
                 )?;
-                let script = Script::new(ver_script, &hash_value, module_store, &self.name_cache)?;
-                scripts.insert(hash_value, script)
+                scripts.insert_verified_script(hash_value, script)
             },
         };
 
@@ -353,33 +372,37 @@
         })
     }
 
-    // The process of deserialization and verification is not and it must not be under lock.
+    fn deserialize_script(&self, script: &[u8]) -> VMResult<CompiledScript> {
+        CompiledScript::deserialize_with_config(script, &self.vm_config().deserializer_config)
+            .map_err(|err| {
+                let msg = format!("[VM] deserializer for script returned error: {:?}", err);
+                PartialVMError::new(StatusCode::CODE_DESERIALIZATION_ERROR)
+                    .with_message(msg)
+                    .finish(Location::Script)
+            })
+    }
+
+    // The process of verification must not be under lock.
     // So when publishing modules through the dependency DAG it may happen that a different
     // thread had loaded the module after this process fetched it from storage.
     // Caching will take care of that by asking for each dependency module again under lock.
-    fn deserialize_and_verify_script(
+    fn verify_script(
         &self,
-        script: &[u8],
-        hash_value: [u8; 32],
+        script: &CompiledScript,
         data_store: &mut TransactionDataCache,
         module_store: &ModuleStorageAdapter,
-    ) -> VMResult<Arc<CompiledScript>> {
-        let script = data_store.load_compiled_script_to_cache(script, hash_value)?;
-
+    ) -> VMResult<()> {
         // Verification:
         //   - Local, using a bytecode verifier.
         //   - Global, loading & verifying module dependencies.
-        move_bytecode_verifier::verify_script_with_config(
-            &self.vm_config.verifier_config,
-            script.as_ref(),
-        )?;
+        move_bytecode_verifier::verify_script_with_config(&self.vm_config.verifier_config, script)?;
         let loaded_deps = script
             .immediate_dependencies()
             .into_iter()
             .map(|module_id| self.load_module(&module_id, data_store, module_store))
             .collect::<VMResult<Vec<_>>>()?;
-        dependencies::verify_script(&script, loaded_deps.iter().map(|m| m.module()))?;
-        Ok(script)
+        dependencies::verify_script(script, loaded_deps.iter().map(|m| m.module()))?;
+        Ok(())
     }
 
     //
@@ -1095,13 +1118,16 @@
     //
 
     fn get_script(&self, hash: &ScriptHash) -> Arc<Script> {
-        Arc::clone(
-            self.scripts
-                .read()
-                .scripts
-                .get(hash)
-                .expect("Script hash on Function must exist"),
-        )
+        let scripts = self.scripts.read();
+        let entry = scripts
+            .get(hash)
+            .expect("Script hash on Function must exist");
+        match entry {
+            ScriptCacheEntry::Verified(script) => script.clone(),
+            ScriptCacheEntry::Deserialized(_) => {
+                unreachable!("Script must be verified before it is main function scope is accessed")
+            },
+        }
     }
 }
 

diff --git a/third_party/move/move-vm/runtime/src/loader/modules.rs b/third_party/move/move-vm/runtime/src/loader/modules.rs
@@ -48,7 +48,7 @@ pub trait ModuleStorage {
     fn fetch_module_by_ref(&self, addr: &AccountAddress, name: &IdentStr) -> Option<Arc<Module>>;
 }
 
-pub(crate) struct ModuleCache(RwLock<BinaryCache<ModuleId, Module>>);
+pub(crate) struct ModuleCache(RwLock<BinaryCache<ModuleId, Arc<Module>>>);
 
 impl ModuleCache {
     pub fn new() -> Self {
@@ -68,7 +68,10 @@ impl Clone for ModuleCache {
 
 impl ModuleStorage for ModuleCache {
     fn store_module(&self, module_id: &ModuleId, binary: Module) -> Arc<Module> {
-        self.0.write().insert(module_id.clone(), binary).clone()
+        self.0
+            .write()
+            .insert(module_id.clone(), Arc::new(binary))
+            .clone()
     }
 
     fn fetch_module(&self, module_id: &ModuleId) -> Option<Arc<Module>> {

diff --git a/third_party/move/move-vm/runtime/src/loader/script.rs b/third_party/move/move-vm/runtime/src/loader/script.rs
@@ -203,12 +203,20 @@
     }
 }
 
-// A script cache is a map from the hash value of a script and the `Script` itself.
-// Script are added in the cache once verified and so getting a script out the cache
-// does not require further verification (except for parameters and type parameters)
+/// An entry for the script cache (associated to its hash). An entry is either:
+///   1. Fully-verified script, which can be used directly and does not require further
+///      verification (except for parameters and type parameters).
+///   2. A deserialized script, which is verified first time it is loaded for execution and
+///      then will no longer need re-verification.
+#[derive(Clone)]
+pub(crate) enum ScriptCacheEntry {
+    Verified(Arc<Script>),
+    Deserialized(Arc<CompiledScript>),
+}
+
 #[derive(Clone)]
 pub(crate) struct ScriptCache {
-    pub(crate) scripts: BinaryCache<ScriptHash, Script>,
+    scripts: BinaryCache<ScriptHash, ScriptCacheEntry>,
 }
 
 impl ScriptCache {
@@ -218,17 +226,31 @@
         }
     }
 
-    pub(crate) fn get(&self, hash: &ScriptHash) -> Option<Arc<Function>> {
-        self.scripts.get(hash).map(|script| script.entry_point())
+    pub(crate) fn get(&self, hash: &ScriptHash) -> Option<&ScriptCacheEntry> {
+        self.scripts.get(hash)
     }
 
-    pub(crate) fn insert(&mut self, hash: ScriptHash, script: Script) -> Arc<Function> {
-        match self.get(&hash) {
-            Some(cached) => cached,
-            None => {
-                let script = self.scripts.insert(hash, script);
-                script.entry_point()
-            },
-        }
+    pub(crate) fn insert_compiled_script(
+        &mut self,
+        hash: ScriptHash,
+        compiled_script: CompiledScript,
+    ) -> Arc<CompiledScript> {
+        let compiled_script_to_return = Arc::new(compiled_script);
+        self.scripts.insert(
+            hash,
+            ScriptCacheEntry::Deserialized(compiled_script_to_return.clone()),
+        );
+        compiled_script_to_return
+    }
+
+    pub(crate) fn insert_verified_script(
+        &mut self,
+        hash: ScriptHash,
+        verified_script: Script,
+    ) -> Arc<Function> {
+        let entry_point = verified_script.entry_point();
+        self.scripts
+            .insert(hash, ScriptCacheEntry::Verified(Arc::new(verified_script)));
+        entry_point
     }
 }