MrSimple01 commited on
Commit
fa8d0c9
·
verified ·
1 Parent(s): bcf9fd7

Update src/documentProcessing.py

Browse files
Files changed (1) hide show
  1. src/documentProcessing.py +26 -12
src/documentProcessing.py CHANGED
@@ -36,11 +36,27 @@ def extract_text_from_txt(txt_path):
36
 
37
  def process_document(document_path, gemini_api_key, language, content_type):
38
  try:
39
- temp_file = tempfile.mktemp(suffix=os.path.splitext(document_path.name)[-1])
40
- with open(temp_file, 'wb') as f:
41
- f.write(document_path.read())
42
-
43
  file_extension = os.path.splitext(document_path.name)[-1].lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  if file_extension == '.pdf':
45
  text = extract_text_from_pdf(temp_file)
46
  elif file_extension == '.docx':
@@ -49,19 +65,17 @@ def process_document(document_path, gemini_api_key, language, content_type):
49
  text = extract_text_from_txt(temp_file)
50
  else:
51
  raise Exception(f"Unsupported file type: {file_extension}")
52
-
53
  text_file_path = tempfile.mktemp(suffix='.txt')
54
  with open(text_file_path, 'w', encoding='utf-8') as f:
55
  f.write(text)
56
-
 
57
  formatted_output, json_path, txt_path = analyze_document(
58
- text,
59
- gemini_api_key,
60
- language,
61
- content_type
62
  )
63
-
64
  return f"Document processed successfully", text_file_path, formatted_output, txt_path, json_path
65
  except Exception as e:
66
  error_message = f"Error processing document: {str(e)}"
67
- return error_message, None, error_message, None, None
 
36
 
37
  def process_document(document_path, gemini_api_key, language, content_type):
38
  try:
39
+ # Create a temporary file
 
 
 
40
  file_extension = os.path.splitext(document_path.name)[-1].lower()
41
+ temp_file = tempfile.mktemp(suffix=file_extension)
42
+
43
+ # Handle different file-like objects
44
+ if hasattr(document_path, 'read'):
45
+ # If it's a file-like object with read method
46
+ with open(temp_file, 'wb') as f:
47
+ f.write(document_path.read())
48
+ elif hasattr(document_path, 'file'):
49
+ # If it's a Django or similar web framework file upload
50
+ with open(temp_file, 'wb') as f:
51
+ for chunk in document_path.file.chunks():
52
+ f.write(chunk)
53
+ elif isinstance(document_path, str):
54
+ # If it's a file path string
55
+ temp_file = document_path
56
+ else:
57
+ raise Exception("Unsupported document_path type")
58
+
59
+ # Process based on file type
60
  if file_extension == '.pdf':
61
  text = extract_text_from_pdf(temp_file)
62
  elif file_extension == '.docx':
 
65
  text = extract_text_from_txt(temp_file)
66
  else:
67
  raise Exception(f"Unsupported file type: {file_extension}")
68
+
69
  text_file_path = tempfile.mktemp(suffix='.txt')
70
  with open(text_file_path, 'w', encoding='utf-8') as f:
71
  f.write(text)
72
+
73
+ # Assume this function is defined elsewhere
74
  formatted_output, json_path, txt_path = analyze_document(
75
+ text, gemini_api_key, language, content_type
 
 
 
76
  )
77
+
78
  return f"Document processed successfully", text_file_path, formatted_output, txt_path, json_path
79
  except Exception as e:
80
  error_message = f"Error processing document: {str(e)}"
81
+ return error_message, None, error_message, None, None