RepoToText / app.py
johnpaulbin's picture
Create app.py
3b27229 verified
import gradio as gr
import git
import os
import tempfile
import shutil
def is_text_file(file_path):
"""Check if a file is likely to be a text file."""
try:
with open(file_path, 'r', encoding='utf-8') as f:
f.read(1024) # Try to read the first 1024 bytes
return True
except UnicodeDecodeError:
return False
except Exception:
return False
def clone_and_concatenate(repo_url):
# Create a temporary directory
temp_dir = tempfile.mkdtemp()
output_file = os.path.join(temp_dir, "concatenated_files.txt")
try:
# Clone the repository
git.Repo.clone_from(repo_url, temp_dir)
# Walk through all files in the repository
with open(output_file, 'w', encoding='utf-8') as outfile:
for root, dirs, files in os.walk(temp_dir):
# Skip .git directory
if '.git' in dirs:
dirs.remove('.git')
for file in files:
if file == "concatenated_files.txt":
continue
file_path = os.path.join(root, file)
relative_path = os.path.relpath(file_path, temp_dir)
# Check if the file is a text file
if is_text_file(file_path):
try:
with open(file_path, 'r', encoding='utf-8') as infile:
outfile.write(f"\n\n{'='*80}\n")
outfile.write(f"File: {relative_path}\n")
outfile.write(f"{'='*80}\n\n")
outfile.write(infile.read())
except Exception as e:
outfile.write(f"\nError reading file {relative_path}: {str(e)}\n")
except Exception as e:
# Clean up the temporary directory
shutil.rmtree(temp_dir)
return None, f"Error: {str(e)}"
return output_file, "Repository cloned and files concatenated successfully!"
# Create the Gradio interface
iface = gr.Interface(
fn=clone_and_concatenate,
inputs=gr.Textbox(label="Git Repository URL"),
outputs=[
gr.File(label="Concatenated Files"),
gr.Textbox(label="Status")
],
title="Git Repository File Concatenator",
description="Enter a Git repository URL to clone it and concatenate all text files into a single downloadable file.",
examples=[["https://github.com/username/repository"]]
)
# Launch the app
if __name__ == "__main__":
iface.launch()