File size: 248 Bytes
c8a32e7
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
import re


def cleanup_text(full_text):
    full_text = re.sub(r'\n{3,}', '\n\n', full_text)
    full_text = re.sub(r'(\n\s){3,}', '\n\n', full_text)
    full_text = full_text.replace('\xa0', ' ') # Replace non-breaking spaces
    return full_text