Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,632 Bytes
cbfae43 7dfd5db 6c6ef93 9c9d121 7dfd5db 2144fdb 3b930a2 771a832 2144fdb f37ed6f e62c0db 83b6b2f e62c0db 7a1d957 5767a00 6c6ef93 e62c0db 4f80c95 3b930a2 93bd79b 83b6b2f 9c9d121 fc9ddd0 84443f7 87253bd cac8a90 a39d8f4 7a65a6f 9fac792 fb01acd 9fac792 fb01acd fc46b6a 9fac792 a39d8f4 822d8b4 33dfc83 9f58c2e fb01acd c92251d 6ce6744 f900015 33dfc83 83b6b2f 6c6ef93 83b6b2f 6c6ef93 c92251d 83b6b2f 6c6ef93 83b6b2f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
import frontmatter
import gradio as gr
import json
import spaces
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
model_id = "AverageBusinessUser/aidapal"
filename = "aidapal-8k.Q4_K_M.gguf"
print("Downloading model")
tokenizer = AutoTokenizer.from_pretrained(model_id, gguf_file=filename)
model = AutoModelForCausalLM.from_pretrained(
model_id, gguf_file=filename, device_map="auto"
)
example = """int __fastcall sub_B0D04(int a1, int a2)
{
unsigned int v2; // r4
int result; // r0
v2 = a1 + a2;
if ( __CFADD__(a1, a2) )
return 0;
result = _libc_alloca_cutoff();
if ( v2 <= 0x1000 )
return result | 1;
return result;
}"""
examples = [json.loads(line)["input"] for line in open("gpt4_juiced_dataset.json", "r")]
# Then create the pipeline with the model and tokenizer
pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer)
# TEMPLATE """{{ .System }}
# [INST]
# {{ .Prompt }}
# [/INST]
# """
# SYSTEM """<s>[INST]You are an expert at analyzing code that has been decompiled with IDA Hex Rays into IDA Hex Rays pseudocode. As a IDA Hex Rays pseudocode analyzer, you will be provided code that may or may not have symbols and variable names. You will analyze the IDA Hex Rays pseudocode and explain exactly what each line is doing. Then you will review your analysis and determine potential name for the function and variables within the function. Your task is use your knowledge of reverse engineering, IDA Hex Rays pseudocode, and C to assist the user with analysis and reverse engineering. Provide a detailed description of the Hex Rays pseudocode to the user explaining what the code does, suggest a function name based on the analysis of the pseudocode, and new variable names based on the analysis of the code. Only respond with valid JSON using the keys 'function_name','comment', and an array 'variables'. Values should use plain ascii with no special characters.
# Analyze the following IDA Hex Rays pseudocode and generate a valid JSON object containing the keys 'function_name','comment', and an array 'variables' explaining what the code does, suggest a function name based on the analysis of the code, and new variable names based on the analysis of the code.[/INST]</s>
# """
system = """<s>[INST]You are an expert at analyzing code that has been decompiled with IDA Hex Rays into IDA Hex Rays pseudocode. As a IDA Hex Rays pseudocode analyzer, you will be provided code that may or may not have symbols and variable names. You will analyze the IDA Hex Rays pseudocode and explain exactly what each line is doing. Then you will review your analysis and determine potential name for the function and variables within the function. Your task is use your knowledge of reverse engineering, IDA Hex Rays pseudocode, and C to assist the user with analysis and reverse engineering. Provide a detailed description of the Hex Rays pseudocode to the user explaining what the code does, suggest a function name based on the analysis of the pseudocode, and new variable names based on the analysis of the code. Only respond with valid JSON using the keys 'function_name','comment', and an array 'variables'. Values should use plain ascii with no special characters.
Analyze the following IDA Hex Rays pseudocode and generate a valid JSON object containing the keys 'function_name','comment', and an array 'variables' explaining what the code does, suggest a function name based on the analysis of the code, and new variable names based on the analysis of the code.[/INST]</s>
"""
@spaces.GPU
def predict(code):
prompt = f"""{system}
[INST]
{code}
[/INST]
"""
print(f"Prompt: {repr(prompt)}")
print(f"Tokenized: {tokenizer.tokenize(prompt)}")
pipe_out = pipe(
prompt,
do_sample=True,
top_k=100,
top_p=0.09,
temperature=1.2,
repetition_penalty=1.1,
return_full_text=False,
max_length=4096,
)
print(f"Pipe out: {repr(pipe_out)}")
raw_output = pipe_out[0]["generated_text"]
output = raw_output
if output.startswith("```json\n"):
output = output[8:]
json_output = json.dumps([])
try:
json.loads(output)
json_output = output
except Exception:
pass
print(f"JSON output: {repr(json_output)}")
return json_output, raw_output
demo = gr.Interface(
fn=predict,
inputs=gr.Text(label="Hex-Rays decompiler output"),
outputs=[gr.JSON(label="Aidapal Output as JSON"), gr.Text(label="Raw Aidapal Output")],
description=frontmatter.load("README.md").content,
examples=examples
)
demo.launch()
|