Skip to content

Commit

Permalink
[mono] Use unsigned char when computing UTF8 string hashes (#83273)
Browse files Browse the repository at this point in the history
* [mono] Use `unsigned char` when computing UTF8 string hashes

The C standard does not specify whether `char` is signed or unsigned, it is implementation defined.

Apparently Android aarch64 makes a different choice than other platforms (at least macOS arm64 and Windows x64 give different results).

Mono uses `mono_metadata_str_hash` in the AOT compiler and AOT runtime to optimize class name lookup.  As a result, classes whose names include UTF-8 continuation bytes (with the high bit = 1) will hash differently in the AOT compiler and on the device.

Fixes #82187
Fixes #78638

* [aot] add DEBUG_AOT_NAME_TABLE code for debugging the class names

   AOT compiler: Emits a second "class_name_table_debug" symbol that has all the class names and hashes as strings.

   AOT runtime: warns if a class is not found in the name cache

* Add regression test
  • Loading branch information
lambdageek authored Mar 13, 2023
1 parent bbc7e06 commit d3f7d59
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/mono/mono/eglib/ghashtable.c
Original file line number Diff line number Diff line change
Expand Up @@ -673,7 +673,7 @@ guint
g_str_hash (gconstpointer v1)
{
guint hash = 0;
char *p = (char *) v1;
unsigned char *p = (unsigned char *) v1;

while (*p++)
hash = (hash << 5) - (hash + *p);
Expand Down
3 changes: 2 additions & 1 deletion src/mono/mono/metadata/metadata.c
Original file line number Diff line number Diff line change
Expand Up @@ -5524,7 +5524,8 @@ guint
mono_metadata_str_hash (gconstpointer v1)
{
/* Same as g_str_hash () in glib */
char *p = (char *) v1;
/* note: signed/unsigned char matters - we feed UTF-8 to this function, so the high bit will give diferent results if we don't match. */
unsigned char *p = (unsigned char *) v1;
guint hash = *p;

while (*p++) {
Expand Down
52 changes: 48 additions & 4 deletions src/mono/mono/mini/aot-compiler.c
Original file line number Diff line number Diff line change
Expand Up @@ -11417,18 +11417,40 @@ emit_class_info (MonoAotCompile *acfg)
typedef struct ClassNameTableEntry {
guint32 token, index;
struct ClassNameTableEntry *next;
#ifdef DEBUG_AOT_NAME_TABLE
char *full_name;
uint32_t hash;
#endif
} ClassNameTableEntry;

static char*
get_class_full_name_for_hash (MonoClass *klass)
{
return mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
}

static uint32_t
hash_for_class (MonoClass *klass)
{
char *full_name = get_class_full_name_for_hash (klass);
uint32_t hash = mono_metadata_str_hash (full_name);
g_free (full_name);
return hash;
}

static void
emit_class_name_table (MonoAotCompile *acfg)
{
int buf_size;
guint32 token, hash;
MonoClass *klass;
GPtrArray *table;
char *full_name;
guint8 *buf, *p;
ClassNameTableEntry *entry, *new_entry;
#ifdef DEBUG_AOT_NAME_TABLE
int name_buf_size = 0;
guint8 *name_buf, *name_p;
#endif

/*
* Construct a chained hash table for mapping class names to typedef tokens.
Expand All @@ -11446,13 +11468,17 @@ emit_class_name_table (MonoAotCompile *acfg)
mono_error_cleanup (error);
continue;
}
full_name = mono_type_get_name_full (m_class_get_byval_arg (klass), MONO_TYPE_NAME_FORMAT_FULL_NAME);
hash = mono_metadata_str_hash (full_name) % table_size;
g_free (full_name);
hash = hash_for_class (klass) % table_size;

/* FIXME: Allocate from the mempool */
new_entry = g_new0 (ClassNameTableEntry, 1);
new_entry->token = token;
#ifdef DEBUG_AOT_NAME_TABLE
new_entry->full_name = get_class_full_name_for_hash (klass);
new_entry->hash = hash;
/* '%s'=%08x\n */
name_buf_size += strlen (new_entry->full_name) + strlen("''=\n") + 8;
#endif

entry = (ClassNameTableEntry *)g_ptr_array_index (table, hash);
if (entry == NULL) {
Expand All @@ -11471,6 +11497,10 @@ emit_class_name_table (MonoAotCompile *acfg)
/* Emit the table */
buf_size = table->len * 4 + 4;
p = buf = (guint8 *)g_malloc0 (buf_size);
#ifdef DEBUG_AOT_NAME_TABLE
name_buf_size ++; /* one extra trailing nul */
name_p = name_buf = (guint8 *)g_malloc0 (name_buf_size);
#endif

/* FIXME: Optimize memory usage */
g_assert (table_size < 65000);
Expand All @@ -11488,14 +11518,28 @@ emit_class_name_table (MonoAotCompile *acfg)
else
encode_int16 (0, p, &p);
}
#ifdef DEBUG_AOT_NAME_TABLE
if (entry != NULL) {
name_p += sprintf ((char*)name_p, "'%s'=%08x\n", entry->full_name, entry->hash);
g_free (entry->full_name);
}
#endif
g_free (entry);
}
#ifdef DEBUG_AOT_NAME_TABLE
g_assert (name_p - name_buf <= name_buf_size);
#endif
g_assert (p - buf <= buf_size);
g_ptr_array_free (table, TRUE);

emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME, "class_name_table", buf, GPTRDIFF_TO_INT (p - buf));

g_free (buf);

#ifdef DEBUG_AOT_NAME_TABLE
emit_aot_data (acfg, MONO_AOT_TABLE_CLASS_NAME_DEBUG, "class_name_table_debug", name_buf, GPTRDIFF_TO_INT (name_p - name_buf));
g_free (name_buf);
#endif
}

static void
Expand Down
18 changes: 18 additions & 0 deletions src/mono/mono/mini/aot-runtime.c
Original file line number Diff line number Diff line change
Expand Up @@ -2601,6 +2601,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
MonoTableInfo *t;
guint32 cols [MONO_TYPEDEF_SIZE];
GHashTable *nspace_table;
#ifdef DEBUG_AOT_NAME_TABLE
char *debug_full_name;
uint32_t debug_hash;
#endif

if (!amodule || !amodule->class_name_table)
return FALSE;
Expand Down Expand Up @@ -2634,6 +2638,10 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
full_name = g_strdup_printf ("%s.%s", name_space, name);
}
}
#ifdef DEBUG_AOT_NAME_TABLE
debug_full_name = g_strdup (full_name);
debug_hash = mono_metadata_str_hash (full_name) % table_size;
#endif
hash = mono_metadata_str_hash (full_name) % table_size;
if (full_name != full_name_buf)
g_free (full_name);
Expand Down Expand Up @@ -2673,6 +2681,9 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch
g_hash_table_insert (nspace_table, (char*)name2, *klass);
amodule_unlock (amodule);
}
#ifdef DEBUG_AOT_NAME_TABLE
g_free (debug_full_name);
#endif
return TRUE;
}

Expand All @@ -2686,6 +2697,13 @@ mono_aot_get_class_from_name (MonoImage *image, const char *name_space, const ch

amodule_unlock (amodule);

#ifdef DEBUG_AOT_NAME_TABLE
if (*klass == NULL) {
g_warning ("AOT class name cache '%s'=%08x not found\n", debug_full_name, debug_hash);
}
g_free (debug_full_name);
#endif

return TRUE;
}

Expand Down
5 changes: 5 additions & 0 deletions src/mono/mono/mini/aot-runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,8 @@ typedef enum {
MONO_AOT_METHOD_FLAG_INTERP_ENTRY_ONLY = 16,
} MonoAotMethodFlags;

#undef DEBUG_AOT_NAME_TABLE

typedef enum {
MONO_AOT_TABLE_BLOB,
MONO_AOT_TABLE_CLASS_NAME,
Expand All @@ -99,6 +101,9 @@ typedef enum {
MONO_AOT_TABLE_IMAGE_TABLE,
MONO_AOT_TABLE_WEAK_FIELD_INDEXES,
MONO_AOT_TABLE_METHOD_FLAGS_TABLE,
#ifdef DEBUG_AOT_NAME_TABLE
MONO_AOT_TABLE_CLASS_NAME_DEBUG,
#endif
MONO_AOT_TABLE_NUM
} MonoAotFileTable;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
<OutputType>Exe</OutputType>
</PropertyGroup>
<ItemGroup>
<Compile Include="repro.cs" />
</ItemGroup>
</Project>
31 changes: 31 additions & 0 deletions src/tests/Loader/classloader/regressions/GitHub_82187/repro.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
using System;

/* Regression test for https://github.com/dotnet/runtime/issues/78638
* and https://github.com/dotnet/runtime/issues/82187 ensure AOT
* cross-compiler and AOT runtime use the same name hashing for names
* that include UTF-8 continuation bytes.
*/

[MySpecial(typeof(MeineTüre))]
public class Program
{
public static int Main()
{
var attr = (MySpecialAttribute)Attribute.GetCustomAttribute(typeof (Program), typeof(MySpecialAttribute), false);
if (attr == null)
return 101;
if (attr.Type == null)
return 102;
if (attr.Type.FullName != "MeineTüre")
return 103;
return 100;
}
}

public class MySpecialAttribute : Attribute
{
public Type Type {get; private set; }
public MySpecialAttribute(Type t) { Type = t; }
}

public class MeineTüre {}

0 comments on commit d3f7d59

Please sign in to comment.