From 4aa7d31ca5af0533e99ef364ec722cd707115f62 Mon Sep 17 00:00:00 2001 From: d-netto Date: Tue, 12 Mar 2024 14:42:35 -0300 Subject: [PATCH 1/2] canonicalize nested function names through a per-module counter --- src/ast.c | 52 +++++++++++++++++++++++++++++++++++++++++--- src/init.c | 3 +++ src/julia.h | 1 + src/julia_internal.h | 6 +++++ src/module.c | 1 + 5 files changed, 60 insertions(+), 3 deletions(-) diff --git a/src/ast.c b/src/ast.c index a7c6ae3bc218c..4ecb68316fecc 100644 --- a/src/ast.c +++ b/src/ast.c @@ -149,6 +149,9 @@ struct macroctx_stack { struct macroctx_stack *parent; }; +// Map of scheme symbols to forwared julia symbols +htable_t scm_to_jl_sym_map; + static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod); static value_t julia_to_scm(fl_context_t *fl_ctx, jl_value_t *v); static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, struct macroctx_stack *macroctx, int onelevel, size_t world, int throw_load_error); @@ -156,14 +159,57 @@ static jl_value_t *jl_expand_macros(jl_value_t *expr, jl_module_t *inmodule, str static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s) { assert(issymbol(s)); + jl_sym_t *sym = ptrhash_get(&scm_to_jl_sym_map, (void*)s); + // Symbol has already been forwarded + if (sym != HT_NOTFOUND) { + return sym; + } + char *n = NULL; if (fl_isgensym(fl_ctx, s)) { char gsname[16]; - char *n = uint2str(&gsname[1], sizeof(gsname)-1, + n = uint2str(&gsname[1], sizeof(gsname)-1, ((gensym_t*)ptr(s))->id, 10); *(--n) = '#'; - return jl_symbol(n); } - return jl_symbol(symbol_name(fl_ctx, s)); + else { + n = symbol_name(fl_ctx, s); + } + if (has_gensym_suffix(n)) { + // Transform a symbol such as `#foo#42` into `#foo#{module}<{counter}>` + jl_module_t *m = jl_ast_ctx(fl_ctx)->module; + assert(m != NULL); + // Get the module name + const char *mname = jl_symbol_name(m->name); + size_t l = strlen(n) + 1 + strlen(mname) + 1; + char *nn = (char*)malloc_s(l); + // Get the last `#` in the symbol + char *p = strrchr(n, '#'); + assert(p != NULL); + // Copy the prefix + size_t pl = p - n; + memcpy(nn, n, pl); + nn[pl] = '#'; + // Copy the module name + memcpy(nn + pl + 1, mname, l - pl - 1); + nn[l - 1] = '\0'; + n = nn; + // Now add the numeric suffix of m->sym_counter++ + uint32_t nxt = ++m->sym_counter; + // Convert it to string adding a leading `<` and a trailing `>` + char *q = uint2str((char*)alloca(16), 16, nxt, 10); + // Add the leading `<` and the trailing `>` + char *qq = alloca(strlen(q) + 2); + memset(qq, 0, strlen(q) + 2); + qq[0] = '<'; + memcpy(qq + 1, q, strlen(q) + 1); + qq[strlen(q) + 1] = '>'; + q = qq; + // Append it to the symbol, without a leading `#` + memcpy(nn + pl + 1 + strlen(mname), q, strlen(q) + 1); + // Add it to the hash table of forwarded symbols + ptrhash_put(&scm_to_jl_sym_map, (void*)s, (void*)jl_symbol(n)); + } + return jl_symbol(n); } static value_t fl_defined_julia_global(fl_context_t *fl_ctx, value_t *args, uint32_t nargs) diff --git a/src/init.c b/src/init.c index 801f12ec53930..7167e9254fbff 100644 --- a/src/init.c +++ b/src/init.c @@ -742,6 +742,8 @@ static void init_global_mutexes(void) { JL_MUTEX_INIT(&profile_show_peek_cond_lock, "profile_show_peek_cond_lock"); } +extern htable_t scm_to_jl_sym_map; + JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) { // initialize many things, in no particular order @@ -824,6 +826,7 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) jl_gc_init(); + htable_new(&scm_to_jl_sym_map, 0); arraylist_new(&jl_linkage_blobs, 0); arraylist_new(&jl_image_relocs, 0); arraylist_new(&eytzinger_image_tree, 0); diff --git a/src/julia.h b/src/julia.h index cf0317b6ad9fd..98cc847089ac9 100644 --- a/src/julia.h +++ b/src/julia.h @@ -661,6 +661,7 @@ typedef struct _jl_module_t { int8_t max_methods; jl_mutex_t lock; intptr_t hash; + uint32_t sym_counter; } jl_module_t; struct _jl_globalref_t { diff --git a/src/julia_internal.h b/src/julia_internal.h index 96637d8568844..7f8520b18164d 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -842,6 +842,12 @@ jl_method_t *jl_make_opaque_closure_method(jl_module_t *module, jl_value_t *name int nargs, jl_value_t *functionloc, jl_code_info_t *ci, int isva, int isinferred); JL_DLLEXPORT int jl_is_valid_oc_argtype(jl_tupletype_t *argt, jl_method_t *source); +STATIC_INLINE int has_gensym_suffix(char *name) JL_NOTSAFEPOINT +{ + char *other = strrchr(name, '#'); + return other != NULL && '0' < other[1] && other[1] <= '9'; +} + // Each tuple can exist in one of 4 Vararg states: // NONE: no vararg Tuple{Int,Float32} // INT: vararg with integer length Tuple{Int,Vararg{Float32,2}} diff --git a/src/module.c b/src/module.c index 702c98f165782..d89df2cf802ef 100644 --- a/src/module.c +++ b/src/module.c @@ -49,6 +49,7 @@ JL_DLLEXPORT jl_module_t *jl_new_module_(jl_sym_t *name, jl_module_t *parent, ui jl_module_public(m, name, 1); JL_GC_POP(); } + m->sym_counter = 0; return m; } From 06a4f9739b632a9e755f60acd4e6d209a4d78d66 Mon Sep 17 00:00:00 2001 From: d-netto Date: Tue, 12 Mar 2024 15:02:33 -0300 Subject: [PATCH 2/2] canonicalize anon function names through a per-module counter --- src/ast.c | 58 +++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/src/ast.c b/src/ast.c index 4ecb68316fecc..aa82567cf1c10 100644 --- a/src/ast.c +++ b/src/ast.c @@ -149,7 +149,7 @@ struct macroctx_stack { struct macroctx_stack *parent; }; -// Map of scheme symbols to forwared julia symbols +// Map of scheme symbols to forwarded julia symbols htable_t scm_to_jl_sym_map; static jl_value_t *scm_to_julia(fl_context_t *fl_ctx, value_t e, jl_module_t *mod); @@ -181,34 +181,50 @@ static jl_sym_t *scmsym_to_julia(fl_context_t *fl_ctx, value_t s) // Get the module name const char *mname = jl_symbol_name(m->name); size_t l = strlen(n) + 1 + strlen(mname) + 1; - char *nn = (char*)malloc_s(l); + char *nn = (char*)calloc_s(l); // Get the last `#` in the symbol char *p = strrchr(n, '#'); assert(p != NULL); - // Copy the prefix - size_t pl = p - n; - memcpy(nn, n, pl); + size_t pl; + // Now we check if the prefix itself is gensym'ed, i.e. #some_number + if (n[0] == '#' && '0' < n[1] && n[1] <= '9') { + // It is, so we forward the prefix as well + uint32_t nxt = ++m->sym_counter; + // Now convert it to module_name + char str[strlen(mname) + 16]; + snprintf(str, sizeof(str), "#%s<%d>", mname, nxt); + // Copy the prefix + memcpy(nn, str, strlen(str) + 1); + pl = strlen(str); + // First get the flisp symbol corresponding to the prefix + char pp[strlen(n) + 1]; + memcpy(pp, n, p - n); + pp[p - n] = '\0'; + value_t ps = symbol(fl_ctx, pp); + // Then forward it + ptrhash_put(&scm_to_jl_sym_map, (void*)ps, (void*)jl_symbol(nn)); + // If there is exactly one occurrence of `#` (instead of something like `#foo#42`), we are done + if (p == n) { + n = nn; + goto done; + } + } + else { + pl = p - n; + // Copy the prefix + memcpy(nn, n, pl); + } nn[pl] = '#'; - // Copy the module name - memcpy(nn + pl + 1, mname, l - pl - 1); - nn[l - 1] = '\0'; - n = nn; - // Now add the numeric suffix of m->sym_counter++ + // Append the module_name uint32_t nxt = ++m->sym_counter; - // Convert it to string adding a leading `<` and a trailing `>` - char *q = uint2str((char*)alloca(16), 16, nxt, 10); - // Add the leading `<` and the trailing `>` - char *qq = alloca(strlen(q) + 2); - memset(qq, 0, strlen(q) + 2); - qq[0] = '<'; - memcpy(qq + 1, q, strlen(q) + 1); - qq[strlen(q) + 1] = '>'; - q = qq; - // Append it to the symbol, without a leading `#` - memcpy(nn + pl + 1 + strlen(mname), q, strlen(q) + 1); + char str[strlen(mname) + 16]; + snprintf(str, sizeof(str), "%s<%d>", mname, nxt); + memcpy(nn + pl + 1, str, strlen(str) + 1); // Add it to the hash table of forwarded symbols + n = nn; ptrhash_put(&scm_to_jl_sym_map, (void*)s, (void*)jl_symbol(n)); } +done: return jl_symbol(n); }