File size: 3,650 Bytes
cb35e87
 
 
 
 
 
 
 
 
 
 
385bf5d
 
fb9e6d1
7f5bdb5
385bf5d
 
 
cb35e87
 
7f5bdb5
cb35e87
 
 
385bf5d
 
7f5bdb5
655c971
e146ae1
 
 
 
 
7f5bdb5
073a510
 
 
cb35e87
92c7818
be33e6a
073a510
92c7818
f5e91d1
073a510
cb35e87
 
 
 
655c971
 
 
cb35e87
655c971
cb35e87
655c971
 
 
 
 
 
cb35e87
655c971
cb35e87
655c971
 
 
cb35e87
 
f5e91d1
ea75bda
f5e91d1
cb35e87
 
 
 
7f5bdb5
cb35e87
 
 
 
 
 
 
655c971
 
cb35e87
 
 
 
 
97e4ff6
cb35e87
01b1b14
cb35e87
 
 
 
 
 
 
01b1b14
 
cb35e87
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
import http.client as http_client
import json
import logging
import os
import re
import string

import gradio as gr
import requests


def mark_tokens_bold(string, tokens):
  for token in tokens:
    pattern = re.escape(token) #r"\b" + re.escape(token) + r"\b"
    string = re.sub(pattern, "<span style='color: #ff75b3;'><b>" + token + "</b></span>", string)
  return string


def process_results(results, highlight_terms):
    if len(results) == 0:
        return """<br><p>No results retrieved.</p><br><hr>"""

    results_html = ""
    for result in results:
        text_html = result["text"]
        text_html = mark_tokens_bold(text_html, highlight_terms)

        docid_html = str(result["docid"])

        licenses = " | ".join(result["repo_license"])
        repo_name = result["repo_name"]
        repo_path = result["repo_path"]
        
        results_html += """\
            <p style='font-size:16px; text-align: left; color: white;'>Repository name: <span style='color: #727cd6;'>{}</span></p>
            <p style='font-size:16px; text-align: left; color: white;'>Repository path: <span style='color: #727cd6;'>{}</span></p>
            <p style='font-size:16px; text-align: left; color: white;'>Repository licenses: <span style='color: #727cd6;'>{}</span></p>
            <br>
            <hr>
            <pre style='height: 600px; overflow-y: scroll; overflow-x: hidden; color: #d9d9d9'><code>{}</code></pre>
            <hr>
            <br>
        """.format(repo_name, repo_path, licenses, text_html)
    return results_html


def scisearch(query, language, num_results=10):

    query = " ".join(query.split())
    if query == "" or query is None:
        return ""

    post_data = {"query": query, "k": num_results}

    output = requests.post(
        os.environ.get("address"),
        headers={"Content-type": "application/json"},
        data=json.dumps(post_data),
        timeout=60,
    )

    payload = json.loads(output.text)

    results = payload["results"]
    highlight_terms = payload["highlight_terms"]
    return process_results(results, highlight_terms)


description = """# <p style="text-align: center; color: white;"> 🔎 IceCoder Dataset Search 🔍 </p>
<span style='color: white;'>When you use <a href="todo" style="color: #ff75b3;">IceCoder</a> to generate code it might produce exact copies of code in the pretraining dataset. In that case the code requires
and with this search tool we aim to provide help to finding out where the code came from.</span>"""


if __name__ == "__main__":
    demo = gr.Blocks(
        css=".gradio-container {background-color: #20233fff; color:white}"
    )

    with demo:
        with gr.Row():
            gr.Markdown(value=description)
        with gr.Row():
            query = gr.Textbox(lines=1, max_lines=1, placeholder="Type your query here...", label="Query")

            
        with gr.Row():
            k = gr.Slider(1, 100, value=10, step=1, label="Max Results")
        with gr.Row():
            submit_btn = gr.Button("Submit")
        with gr.Row():
            results = gr.HTML(label="Results", value="<img src=https://huggingface.co./spaces/bigcode/stack-jss-index/resolve/main/bigcode-contact.png' alt='Banner'>")

        def submit(query, k, lang="en"):
            query = query.strip()
            if query is None or query == "":
                return "", ""
            return {
                results: scisearch(query, lang, k),
            }

        query.submit(fn=submit, inputs=[query, k], outputs=[results])
        submit_btn.click(submit, inputs=[query, k], outputs=[results])

    demo.launch(enable_queue=True, debug=True)