xcx0902
/

tiny_llm

Text Generation

Model card Files Files and versions Community

tiny_llm / generate_data.py

xcx0902's picture

Upload folder using huggingface_hub

b51c975 verified about 1 month ago

history blame contribute delete

1.09 kB

	import openai
	import json
	import threading

	config: dict = json.loads(open("config.json").read())

	dataset = []

	def gen():
	try:
	response = openai.OpenAI(
	api_key=config["key"],
	base_url=config["url"],
	default_headers={
	"User-Agent": "OpenAI SDK",
	"Cookie": config.get("cookie", "")
	}
	).chat.completions.create(
	model=config["model"],
	extra_body={"model_id": config["model"]},
	messages=[
	{"role": "user", "content": "Generate some texts for training LLM. Do not add any other elements to your response."}
	],
	stream=False
	)
	dataset.append(response.choices[0].message.content)
	except Exception as e:
	print(e)

	thpool = []

	for i in range(100):
	th = threading.Thread(target=gen)
	th.start()
	thpool.append(th)

	for th in thpool:
	th.join()

	with open("train_data.txt", "a", encoding="utf-8") as f:
	f.write("\n".join(dataset))