Add YouTube transcript summarization tool with Gradio interface and image generation which will further use for blog writing.
Browse files- .gitignore +18 -0
- Images/image.jpg +0 -0
- app.py +27 -5
- requirements.txt +7 -0
- tool.py +56 -21
.gitignore
ADDED
@@ -0,0 +1,18 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
.ipynb_checkpoints/
|
2 |
+
__pycache__/
|
3 |
+
*.py[cod]
|
4 |
+
*$py.class
|
5 |
+
.env
|
6 |
+
venv/
|
7 |
+
__pycache__/
|
8 |
+
*.pyc
|
9 |
+
*.pyo
|
10 |
+
*$py.class
|
11 |
+
*~
|
12 |
+
*.swp
|
13 |
+
*.log
|
14 |
+
/instance
|
15 |
+
# Byte-compiled / optimized / DLL files
|
16 |
+
__pycache__/
|
17 |
+
*.py[cod]
|
18 |
+
*$py.class
|
Images/image.jpg
ADDED
![]() |
app.py
CHANGED
@@ -1,7 +1,29 @@
|
|
1 |
-
|
2 |
-
from
|
3 |
-
from tool import YouTubeTranscriptExtractor
|
4 |
|
5 |
-
|
|
|
6 |
|
7 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from tool import YouTubeTranscriptExtractor, TranscriptSummarizer
|
|
|
3 |
|
4 |
+
youtube_tool = YouTubeTranscriptExtractor()
|
5 |
+
summarizer_tool = TranscriptSummarizer()
|
6 |
|
7 |
+
def process_youtube_video(video_url):
|
8 |
+
transcript = youtube_tool.forward(video_url=video_url)
|
9 |
+
summary_and_blog = summarizer_tool.forward(transcript=transcript)
|
10 |
+
try:
|
11 |
+
summary, image_url = summary_and_blog.split("\n\nImage URL: ")
|
12 |
+
except ValueError:
|
13 |
+
summary = summary_and_blog
|
14 |
+
image_url = None
|
15 |
+
return transcript, summary, image_url
|
16 |
+
|
17 |
+
iface = gr.Interface(
|
18 |
+
fn=process_youtube_video,
|
19 |
+
inputs="text",
|
20 |
+
outputs=[
|
21 |
+
gr.Textbox(label="Transcript"),
|
22 |
+
gr.Textbox(label="Summary and Blog Content"),
|
23 |
+
gr.Image(label="Generated Image", image_mode="RGBA")
|
24 |
+
],
|
25 |
+
title="YouTube Transcript Summarizer and Blog Content Generator",
|
26 |
+
description="Enter a YouTube video URL to extract the transcript, summarize it, and generate blog content with an image."
|
27 |
+
)
|
28 |
+
|
29 |
+
iface.launch()
|
requirements.txt
CHANGED
@@ -1,2 +1,9 @@
|
|
1 |
smolagents
|
2 |
pytubefix
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
smolagents
|
2 |
pytubefix
|
3 |
+
gradio
|
4 |
+
transformers
|
5 |
+
torch
|
6 |
+
torchvision
|
7 |
+
python-dotenv
|
8 |
+
requests
|
9 |
+
Pillow
|
tool.py
CHANGED
@@ -1,5 +1,42 @@
|
|
1 |
from smolagents.tools import Tool
|
2 |
-
import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
class YouTubeTranscriptExtractor(Tool):
|
5 |
description = "Extracts the transcript from a YouTube video."
|
@@ -8,30 +45,28 @@ class YouTubeTranscriptExtractor(Tool):
|
|
8 |
output_type = "string"
|
9 |
|
10 |
def forward(self, video_url: str) -> str:
|
11 |
-
|
12 |
try:
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
transcript = yt.captions.all()[0].generate_srt_captions()
|
24 |
-
lang=yt.captions.all()[0].code
|
25 |
-
|
26 |
-
return lang + "transcript : " + transcript
|
27 |
-
# return transcript
|
28 |
|
|
|
|
|
|
|
|
|
|
|
29 |
|
|
|
|
|
30 |
except Exception as e:
|
31 |
-
|
32 |
-
return f"An unexpected error occurred: {str(e)}"
|
33 |
-
|
34 |
|
35 |
def __init__(self, *args, **kwargs):
|
36 |
self.is_initialized = False
|
37 |
-
|
|
|
1 |
from smolagents.tools import Tool
|
2 |
+
from typing import Optional
|
3 |
+
import os
|
4 |
+
from transformers import pipeline
|
5 |
+
import requests
|
6 |
+
import io
|
7 |
+
from PIL import Image
|
8 |
+
from dotenv import load_dotenv
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
class TranscriptSummarizer(Tool):
|
13 |
+
description = "Summarizes a transcript and generates blog content using the transformers library and Hugging Face API for image generation."
|
14 |
+
name = "transcript_summarizer"
|
15 |
+
inputs = {'transcript': {'type': 'string', 'description': 'The transcript to summarize.'}}
|
16 |
+
output_type = "string"
|
17 |
+
|
18 |
+
def __init__(self, *args, **kwargs):
|
19 |
+
super().__init__(*args, **kwargs)
|
20 |
+
self.summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
|
21 |
+
self.api_url = "https://api-inference.huggingface.co/models/ZB-Tech/Text-to-Image"
|
22 |
+
self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"}
|
23 |
+
|
24 |
+
def query(self, payload):
|
25 |
+
response = requests.post(self.api_url, headers=self.headers, json=payload)
|
26 |
+
return response.content
|
27 |
+
|
28 |
+
def forward(self, transcript: str) -> str:
|
29 |
+
try:
|
30 |
+
summary = self.summarizer(transcript, max_length=2000, min_length=750, do_sample=False)[0]['summary_text']
|
31 |
+
key_entities = summary.split()[:100] # Extract the first 100 words
|
32 |
+
image_prompt = f"Generate an image related to: {' '.join(key_entities)}, professional style"
|
33 |
+
image_bytes = self.query({"inputs": image_prompt})
|
34 |
+
image = Image.open(io.BytesIO(image_bytes))
|
35 |
+
image_url = "Images/image.jpg" # Placeholder, as we can't directly pass PIL Image to Gradio
|
36 |
+
image.save(image_url) # Save the image to a file
|
37 |
+
return f"{summary}\n\nImage URL: {image_url}" # Return the file path
|
38 |
+
except Exception as e:
|
39 |
+
return f"An unexpected error occurred: {str(e)}"
|
40 |
|
41 |
class YouTubeTranscriptExtractor(Tool):
|
42 |
description = "Extracts the transcript from a YouTube video."
|
|
|
45 |
output_type = "string"
|
46 |
|
47 |
def forward(self, video_url: str) -> str:
|
|
|
48 |
try:
|
49 |
+
from pytubefix import YouTube
|
50 |
+
# Create a YouTube object
|
51 |
+
yt = YouTube(video_url)
|
52 |
+
lang='en'
|
53 |
+
# Get the video transcript
|
54 |
+
if lang in yt.captions:
|
55 |
+
transcript = yt.captions['en'].generate_srt_captions()
|
56 |
+
else:
|
57 |
+
transcript = yt.captions.all()[0].generate_srt_captions()
|
58 |
+
lang = yt.captions.all()[0].code
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
+
# Clean up the transcript by removing timestamps and line numbers
|
61 |
+
cleaned_transcript = ""
|
62 |
+
for line in transcript.splitlines():
|
63 |
+
if not line.strip().isdigit() and "-->" not in line:
|
64 |
+
cleaned_transcript += line + "\n"
|
65 |
|
66 |
+
print("transcript : ", cleaned_transcript)
|
67 |
+
return cleaned_transcript
|
68 |
except Exception as e:
|
69 |
+
return f"An unexpected error occurred: {str(e)}"
|
|
|
|
|
70 |
|
71 |
def __init__(self, *args, **kwargs):
|
72 |
self.is_initialized = False
|
|