Spaces:
Runtime error
Runtime error
File size: 4,069 Bytes
d54ec92 61cd51a d54ec92 61cd51a dd24401 d54ec92 0cc7583 d54ec92 a479d14 0cc7583 a10f12f d54ec92 f031242 0cc7583 d424b19 0cc7583 a10f12f 0cc7583 d424b19 0cc7583 d54ec92 0cc7583 d424b19 0cc7583 f031242 d54ec92 43ab1fa f031242 d54ec92 a479d14 d54ec92 f031242 edfd769 d54ec92 1d0916c edfd769 d54ec92 43ab1fa d54ec92 f031242 d54ec92 43ab1fa d54ec92 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
from nltk import sent_tokenize
import openai
import re
import nltk
nltk.download('punkt')
class SynonymEditor:
def __init__(self, api_key, model_engine, max_tokens, temperature, language):
openai.api_key = api_key
self.model_engine = model_engine
self.max_tokens = max_tokens
self.temperature = temperature
self.language = language
self.quote = '__ZITIEREN__' if (language == 'de') else '__QUOTE__'
# Play with the prompts here and change the return index to change and see the effect of the prompt on the output quality
# Note that the longer the prompt, higher the token used and hence the billing
def _get_prompt(self, sentence, few_shots):
if (few_shots):
if (self.language == "de"):
prompt = 'Modernisiere den deutschen Text. Fasse direkte Reden NIE zusammen.\n' + \
few_shots + "\nEingang:" + sentence + " Ausgang:"
else:
prompt = "Replace exactly one word with a synonym while preserving the overall sentence structure and meaning.\n" + \
few_shots + "\nInput:" + sentence + " Output:"
elif self.quote in sentence:
if (self.language == "de"):
prompt = 'Modernisiere den deutschen Text. Fasse direkte Reden NIE zusammen.\n'+sentence+'\n'
else:
prompt = "Replace exactly one word with a synonym while preserving __QUOTE__ in the following sentence:\n"+sentence+"\n"
else:
if (self.language == "de"):
prompt = 'Modernisiere den deutschen Text. Fasse direkte Reden NIE zusammen.\n'+sentence+'\n'
else:
prompt = "Replace exactly one word with a synonym in the following sentence:\n"+sentence+"\n"
return prompt
# Call the OpenAI API here
def __call_ai(self, sentence, few_shots):
prompt = self._get_prompt(sentence, few_shots)
print(prompt)
response = openai.Completion.create(
model=self.model_engine,
prompt=prompt,
temperature=self.temperature,
max_tokens=self.max_tokens,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return self._post_process_sentence(response.choices[0].text.strip())
# Split the paragraph to preserve quotation marks
def _split_into_sentences(self, text, few_shots):
if (few_shots == False):
text = text.replace('"', self.quote)
text = re.sub(r'\s+', ' ', text)
text = text.strip()
sentences = sent_tokenize(text)
return sentences
def _post_process_sentence(self, text):
print(text)
print("==============")
return text.replace(self.quote, '"')
# Preprocess the text, perform edit task and join back to get the original format
def _edit_text(self, text, few_shots=False):
edited_text = ""
paragraphs = text.split("\n\n")
edited_paragraphs = []
for paragraph in paragraphs:
sentences = self._split_into_sentences(paragraph, few_shots)
edited_sentences = []
for sentence in sentences:
new_sentence = self.__call_ai(sentence, few_shots)
edited_sentences.append(new_sentence)
# join edited sentences to form an edited paragraph
edited_paragraph = ' '.join(edited_sentences)
edited_paragraphs.append(edited_paragraph)
# join edited paragraphs to form edited text
edited_text = '\n\n'.join(edited_paragraphs)
return edited_text
# File Read Write operation
def edit_file(self, input_file, output_file):
print("Opening File")
with open(input_file, "r", encoding="utf8", errors="ignore") as f:
text = f.read()
print("Editing")
edited_text = self._edit_text(text)
print("Finishing up")
with open(output_file, "w") as f:
f.write(edited_text)
print("Done!")
|