llm4decompile-1.3b-v2

Sleeping

File size: 2,676 Bytes

08fedc0
cacf2b7
2c55bb6
b955ca1
08fedc0
aa62017
536fde5
 
08fedc0
aa62017
 
 
 
 
 
cacf2b7
77bd6a8
536fde5
 
 
 
 
 
 
 
 
08fedc0
3116465
19ace5b
000c947
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f0cd8c
 
 
 
 
 
 
 
 
 
 
 
 
 
573f369
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8f0cd8c
3116465
02ecf35
08fedc0

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = 'LLM4Binary/llm4decompile-1.3b-v2' # V2 Model
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype=torch.bfloat16).cuda()

description = """
# LLM4Decompile 1.3B V2

This is a space for testing the [LLM4Decompile 1.3B V2 model](https://huggingface.co./LLM4Binary/llm4decompile-1.3b-v2).  It expects to be given a decompiled function output by Ghidra.  I simply copy and paste from the Ghidra GUI, but this is not the method recommended by the official model page, so YMMV.
"""

@spaces.GPU
def predict(input_asm):
    before = f"# This is the assembly code:\n"#prompt
    after = "\n# What is the source code?\n"#prompt
    input_prompt = before+input_asm.strip()+after
    
    inputs = tokenizer(input_prompt, return_tensors="pt").to(model.device)
    with torch.no_grad():
        outputs = model.generate(**inputs, max_new_tokens=2048)### max length to 4096, max new tokens should be below the range
    c_func_decompile = tokenizer.decode(outputs[0][len(inputs[0]):-1])
    return c_func_decompile

demo = gr.Interface(fn=predict,
                    description=description,
                    examples=["""undefined4 func0(float param_1,long param_2,int param_3)
{
  int local_28;
  int local_24;
  
  local_24 = 0;
  do {
    local_28 = local_24;
    if (param_3 <= local_24) {
      return 0;
    }
    while (local_28 = local_28 + 1, local_28 < param_3) {
      if ((double)((ulong)(double)(*(float *)(param_2 + (long)local_24 * 4) -
                                  *(float *)(param_2 + (long)local_28 * 4)) &
                  SUB168(_DAT_00402010,0)) < (double)param_1) {
        return 1;
      }
    }
    local_24 = local_24 + 1;
  } while( true );
}""","""
undefined8 __cdecl FUN_004025f0(byte *param_1,int param_2)

{
  do {
    *param_1 = ~*param_1;
    *param_1 = *param_1 - 0x37;
    *param_1 = *param_1 + 1;
    param_1 = param_1 + 1;
    param_2 = param_2 + -1;
  } while (param_2 != 0);
  return 0x3632000137;
}

""","""
void __thiscall FUN_00404f30(void *this,char param_1,uint param_2)

{
  char *pcVar1;
  
  if ((param_1 != '\0') && (0xf < *(uint *)((int)this + 0x18))) {
    pcVar1 = *(char **)((int)this + 4);
    if (param_2 != 0) {
      std::_Traits_helper::copy_s<>((char *)((int)this + 4),0x10,pcVar1,param_2);
    }
    FUN_00405320(pcVar1);
  }
  *(undefined4 *)((int)this + 0x18) = 0xf;
  std::basic_string<>::_Eos((basic_string<> *)this,param_2);
  return;
}

"""],
                    inputs="text", outputs="text")
demo.queue()
demo.launch()