From d0e68ef3292e96ff51cb17d1548458d2f0c47529 Mon Sep 17 00:00:00 2001 From: mahaloz Date: Fri, 5 Jul 2024 12:43:53 -0700 Subject: [PATCH 1/3] Place prompts in source, not as files --- README.md | 5 + dailalib/__init__.py | 25 ++- dailalib/api/litellm/prompts/__init__.py | 67 ++++---- .../api/litellm/prompts/identify_source.j2 | 40 ----- dailalib/api/litellm/prompts/prompts.py | 151 ++++++++++++++++++ .../api/litellm/prompts/rename_function.j2 | 36 ----- .../api/litellm/prompts/rename_variables.j2 | 38 ----- dailalib/api/litellm/prompts/summarize.j2 | 36 ----- setup.cfg | 5 +- 9 files changed, 206 insertions(+), 197 deletions(-) delete mode 100644 dailalib/api/litellm/prompts/identify_source.j2 create mode 100644 dailalib/api/litellm/prompts/prompts.py delete mode 100644 dailalib/api/litellm/prompts/rename_function.j2 delete mode 100644 dailalib/api/litellm/prompts/rename_variables.j2 delete mode 100644 dailalib/api/litellm/prompts/summarize.j2 diff --git a/README.md b/README.md index 4b4d3e9..a1559f1 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,11 @@ Install our library backend through pip and our decompiler plugin through our in pip3 install dailalib && daila --install ``` +This is the light mode. **If you want to use VarBERT, you must install the full version**: +```bash +pip3 install dailalib[full] && daila --install +``` + This will also download the VarBERT models for you through the [VarBERT API](https://github.com/binsync/varbert_api). If you happen to be installing DAILA on a machine that won't have internet access, like a secure network, you can use our Docker image in the [Docker Container](#docker-container) section. diff --git a/dailalib/__init__.py b/dailalib/__init__.py index 5df98c5..e2afb71 100644 --- a/dailalib/__init__.py +++ b/dailalib/__init__.py @@ -21,20 +21,26 @@ def create_plugin(*args, **kwargs): gui_ctx_menu_actions["DAILA/LLM/update_pmpt_style"] = ("Change prompt style...", litellm_api.ask_prompt_style) gui_ctx_menu_actions["DAILA/LLM/update_model"] = ("Change model...", litellm_api.ask_model) - # # VarModel API (local variable renaming) # - from varbert.api import VariableRenamingAPI - var_api = VariableRenamingAPI(delay_init=True) + VARBERT_AVAILABLE = True + try: + from varbert.api import VariableRenamingAPI + except ImportError: + VARBERT_AVAILABLE = False + + var_api = None + if VARBERT_AVAILABLE: + var_api = VariableRenamingAPI(delay_init=True) - # add single interface, which is to rename variables - def make_callback(predict_for_all_variables): - return lambda *args, **kwargs: var_api.query_model(**kwargs, remove_bad_names=not predict_for_all_variables) + # add single interface, which is to rename variables + def make_callback(predict_for_all_variables): + return lambda *args, **kwargs: var_api.query_model(**kwargs, remove_bad_names=not predict_for_all_variables) - gui_ctx_menu_actions["DAILA/VarBERT/varbert_rename_vars"] = ("Suggest new variable names (source-like only)", make_callback(predict_for_all_variables=False)) - gui_ctx_menu_actions["DAILA/VarBERT/varbert_rename_vars_all"] = ("Suggest new variable names (for all variables)", make_callback(predict_for_all_variables=True)) + gui_ctx_menu_actions["DAILA/VarBERT/varbert_rename_vars"] = ("Suggest new variable names (source-like only)", make_callback(predict_for_all_variables=False)) + gui_ctx_menu_actions["DAILA/VarBERT/varbert_rename_vars_all"] = ("Suggest new variable names (for all variables)", make_callback(predict_for_all_variables=True)) # # Decompiler Plugin Registration @@ -50,6 +56,9 @@ def make_callback(predict_for_all_variables): gui_init_args=args, gui_init_kwargs=kwargs ) + if not VARBERT_AVAILABLE: + deci.info("VarBERT not installed, reinstall with `pip install dailalib[full]` to enable local models.") + deci.info("DAILA backend loaded! Initializing context menus now...") litellm_api.init_decompiler_interface(decompiler_interface=deci) diff --git a/dailalib/api/litellm/prompts/__init__.py b/dailalib/api/litellm/prompts/__init__.py index 49a71c6..0cedb9d 100644 --- a/dailalib/api/litellm/prompts/__init__.py +++ b/dailalib/api/litellm/prompts/__init__.py @@ -1,44 +1,35 @@ from pathlib import Path from .prompt_type import PromptType, DEFAULT_STYLE, ALL_STYLES from .prompt import Prompt +from .prompts import SUMMARIZE_FUNCTION, IDENTIFY_SOURCE, RENAME_FUNCTION, RENAME_VARIABLES FILE_DIR = Path(__file__).absolute().parent - -def load_prompts(): - template_texts = {} - for prompt_path in FILE_DIR.glob("*.j2"): - with open(prompt_path, "r") as f: - template_texts[prompt_path.stem] = f.read() - - return [ - Prompt( - "summarize", - template_texts["summarize"], - desc="Summarize the function", - response_key="summary", - gui_result_callback=Prompt.comment_function - ), - Prompt( - "identify_source", - template_texts["identify_source"], - desc="Identify the source of the function", - response_key="link", - gui_result_callback=Prompt.comment_function - ), - Prompt( - "rename_variables", - template_texts["rename_variables"], - desc="Suggest variable names", - gui_result_callback=Prompt.rename_variables - ), - Prompt( - "rename_function", - template_texts["rename_function"], - desc="Suggest a function name", - gui_result_callback=Prompt.rename_function - ), - ] - - -PROMPTS = load_prompts() +PROMPTS = [ + Prompt( + "summarize", + SUMMARIZE_FUNCTION, + desc="Summarize the function", + response_key="summary", + gui_result_callback=Prompt.comment_function + ), + Prompt( + "identify_source", + IDENTIFY_SOURCE, + desc="Identify the source of the function", + response_key="link", + gui_result_callback=Prompt.comment_function + ), + Prompt( + "rename_variables", + RENAME_VARIABLES, + desc="Suggest variable names", + gui_result_callback=Prompt.rename_variables + ), + Prompt( + "rename_function", + RENAME_FUNCTION, + desc="Suggest a function name", + gui_result_callback=Prompt.rename_function + ), +] diff --git a/dailalib/api/litellm/prompts/identify_source.j2 b/dailalib/api/litellm/prompts/identify_source.j2 deleted file mode 100644 index 467705f..0000000 --- a/dailalib/api/litellm/prompts/identify_source.j2 +++ /dev/null @@ -1,40 +0,0 @@ -# Task -You are a decompiled C expert that identifies the original source given decompilation. Upon discovering the source, -you give a link to the code. - -You only respond with a valid json. As an example: -{ - "link": "https://github.com/torvalds/linux" - "version": "5.10" -} - -{% if few_shot %} -# Example -Here is an example. Given the following code: -``` -void __fastcall __noreturn usage(int status) -{ - v2 = program_name; - if ( status ) - { - v3 = dcgettext(0LL, "Try '%s --help' for more information.\n", 5); - _fprintf_chk(stderr, 1LL, v3, v2); - } - // ... -} -``` - -You would respond with: -{ - "link": "https://www.gnu.org/software/coreutils/" - "version": "" -} -{% endif %} - -# Example -Given the following code: -``` -{{ decompilation }} -``` - -You respond with: \ No newline at end of file diff --git a/dailalib/api/litellm/prompts/prompts.py b/dailalib/api/litellm/prompts/prompts.py new file mode 100644 index 0000000..6cbcfa5 --- /dev/null +++ b/dailalib/api/litellm/prompts/prompts.py @@ -0,0 +1,151 @@ +RENAME_FUNCTION = """ +# Example +Here is an example. Given the following code: +``` +int sub_404000(int a0, char** a1) +{ + int is_even; // rax + + is_even = sub_404100(a0[1]) % 2 == 0 + return is_even; +} +``` + +You respond with: +{ + "sub_404000": "is_even", + "sub_404100": "get_value", +} +{% endif %} + +# Example +Given the following code: +``` +{{ decompilation }} +``` + +You respond with: +""" + +RENAME_VARIABLES = """ +# Task +You are decompiled C expert that renames variables in code. When given code, you rename variables according to the +meaning of the function or its use. + +You only respond with a valid json. As an example: +{ + "v1": "i", + "v2": "ptr" +} + +{% if few_shot %} +# Example +Here is an example. Given the following code: +``` +int sub_404000(int a0, char** a1) +{ + int v1; // rax + + v1 = sub_404100(a0[1]) % 2 == 0 + return v1; +} +``` + +You responded with: +{ + "a0": "argc", + "a1": "argv", + "v1": "is_even" +} +{% endif %} + +# Example +Given the following code: +``` +{{ decompilation }} +``` + +You respond with: +""" + +SUMMARIZE_FUNCTION = """ +# Task +You are decompiled C expert that summarizes code. When given code, you summarize at a high level what the function does +and you identify if known algorithms are used in the function. + +You always respond with a valid json: +{ + "summary": "This function computes the fibonacci sequence. It takes an integer as an argument and returns the fibonacci number at that index.", + "algorithms": ["fibonacci"] +} + +{% if few_shot %} +# Example +Here is an example. Given the following code: +``` +int sub_404000(int a0, char** a1) +{ + int v1; // rax + v1 = sub_404100(a0[1]) % 2 == 0 + return v1; +} +``` + +You responded with: +{ + "summary": "This function takes two arguments and implements the is_even check on second argument", + "algorithms": ["is_even"] +} +{% endif %} + +# Example +Given the following code: +``` +{{ decompilation }} +``` + +You respond with: +""" + +IDENTIFY_SOURCE = """ +# Task +You are a decompiled C expert that identifies the original source given decompilation. Upon discovering the source, +you give a link to the code. + +You only respond with a valid json. As an example: +{ + "link": "https://github.com/torvalds/linux" + "version": "5.10" +} + +{% if few_shot %} +# Example +Here is an example. Given the following code: +``` +void __fastcall __noreturn usage(int status) +{ + v2 = program_name; + if ( status ) + { + v3 = dcgettext(0LL, "Try '%s --help' for more information.\n", 5); + _fprintf_chk(stderr, 1LL, v3, v2); + } + // ... +} +``` + +You would respond with: +{ + "link": "https://www.gnu.org/software/coreutils/" + "version": "" +} +{% endif %} + +# Example +Given the following code: +``` +{{ decompilation }} +``` + +You respond with: +""" \ No newline at end of file diff --git a/dailalib/api/litellm/prompts/rename_function.j2 b/dailalib/api/litellm/prompts/rename_function.j2 deleted file mode 100644 index 1f83271..0000000 --- a/dailalib/api/litellm/prompts/rename_function.j2 +++ /dev/null @@ -1,36 +0,0 @@ -# Task -You are decompiled C expert that renames functions. When given a function, you rename it according to the -meaning of the function or its use. You specify which function you are renaming by its name. - -You only respond with a valid json. As an example: -{ - "sub_404000": "fibonacci", -} - -{% if few_shot %} -# Example -Here is an example. Given the following code: -``` -int sub_404000(int a0, char** a1) -{ - int is_even; // rax - - is_even = sub_404100(a0[1]) % 2 == 0 - return is_even; -} -``` - -You respond with: -{ - "sub_404000": "is_even", - "sub_404100": "get_value", -} -{% endif %} - -# Example -Given the following code: -``` -{{ decompilation }} -``` - -You respond with: \ No newline at end of file diff --git a/dailalib/api/litellm/prompts/rename_variables.j2 b/dailalib/api/litellm/prompts/rename_variables.j2 deleted file mode 100644 index 1ed59e6..0000000 --- a/dailalib/api/litellm/prompts/rename_variables.j2 +++ /dev/null @@ -1,38 +0,0 @@ -# Task -You are decompiled C expert that renames variables in code. When given code, you rename variables according to the -meaning of the function or its use. - -You only respond with a valid json. As an example: -{ - "v1": "i", - "v2": "ptr" -} - -{% if few_shot %} -# Example -Here is an example. Given the following code: -``` -int sub_404000(int a0, char** a1) -{ - int v1; // rax - - v1 = sub_404100(a0[1]) % 2 == 0 - return v1; -} -``` - -You responded with: -{ - "a0": "argc", - "a1": "argv", - "v1": "is_even" -} -{% endif %} - -# Example -Given the following code: -``` -{{ decompilation }} -``` - -You respond with: \ No newline at end of file diff --git a/dailalib/api/litellm/prompts/summarize.j2 b/dailalib/api/litellm/prompts/summarize.j2 deleted file mode 100644 index 2e86b7a..0000000 --- a/dailalib/api/litellm/prompts/summarize.j2 +++ /dev/null @@ -1,36 +0,0 @@ -# Task -You are decompiled C expert that summarizes code. When given code, you summarize at a high level what the function does -and you identify if known algorithms are used in the function. - -You always respond with a valid json: -{ - "summary": "This function computes the fibonacci sequence. It takes an integer as an argument and returns the fibonacci number at that index.", - "algorithms": ["fibonacci"] -} - -{% if few_shot %} -# Example -Here is an example. Given the following code: -``` -int sub_404000(int a0, char** a1) -{ - int v1; // rax - v1 = sub_404100(a0[1]) % 2 == 0 - return v1; -} -``` - -You responded with: -{ - "summary": "This function takes two arguments and implements the is_even check on second argument", - "algorithms": ["is_even"] -} -{% endif %} - -# Example -Given the following code: -``` -{{ decompilation }} -``` - -You respond with: \ No newline at end of file diff --git a/setup.cfg b/setup.cfg index ab759cc..610fa1f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,6 @@ install_requires = tiktoken Jinja2 libbs>=1.9.0 - varbert>=2.1.0 python_requires = >= 3.8 include_package_data = True @@ -27,3 +26,7 @@ packages = find: [options.entry_points] console_scripts = daila = dailalib.__main__:main + +[options.extras_require] +full = + varbert>=2.1.0 From f0c205d489ba08703c0e06712912a0decf063ea4 Mon Sep 17 00:00:00 2001 From: mahaloz Date: Fri, 5 Jul 2024 12:46:02 -0700 Subject: [PATCH 2/3] Forgot some prompt --- dailalib/api/litellm/prompts/prompts.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/dailalib/api/litellm/prompts/prompts.py b/dailalib/api/litellm/prompts/prompts.py index 6cbcfa5..23e8aa8 100644 --- a/dailalib/api/litellm/prompts/prompts.py +++ b/dailalib/api/litellm/prompts/prompts.py @@ -1,4 +1,14 @@ RENAME_FUNCTION = """ +# Task +You are decompiled C expert that renames functions. When given a function, you rename it according to the +meaning of the function or its use. You specify which function you are renaming by its name. + +You only respond with a valid json. As an example: +{ + "sub_404000": "fibonacci", +} + +{% if few_shot %} # Example Here is an example. Given the following code: ``` From b1931d1c94eb4bd4551ecbd77e760e5fe1fbbb1b Mon Sep 17 00:00:00 2001 From: mahaloz Date: Fri, 5 Jul 2024 12:47:41 -0700 Subject: [PATCH 3/3] bump --- dailalib/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dailalib/__init__.py b/dailalib/__init__.py index e2afb71..85fa5d1 100644 --- a/dailalib/__init__.py +++ b/dailalib/__init__.py @@ -1,4 +1,4 @@ -__version__ = "3.2.0" +__version__ = "3.3.0" from .api import AIAPI, LiteLLMAIAPI from libbs.api import DecompilerInterface