tiny_llm / generate_data.py
xcx0902's picture
Upload folder using huggingface_hub
b51c975 verified
import openai
import json
import threading
config: dict = json.loads(open("config.json").read())
dataset = []
def gen():
try:
response = openai.OpenAI(
api_key=config["key"],
base_url=config["url"],
default_headers={
"User-Agent": "OpenAI SDK",
"Cookie": config.get("cookie", "")
}
).chat.completions.create(
model=config["model"],
extra_body={"model_id": config["model"]},
messages=[
{"role": "user", "content": "Generate some texts for training LLM. Do not add any other elements to your response."}
],
stream=False
)
dataset.append(response.choices[0].message.content)
except Exception as e:
print(e)
thpool = []
for i in range(100):
th = threading.Thread(target=gen)
th.start()
thpool.append(th)
for th in thpool:
th.join()
with open("train_data.txt", "a", encoding="utf-8") as f:
f.write("\n".join(dataset))