v1
This commit is contained in:
commit
5eb977dee6
25
.gitignore
vendored
Normal file
25
.gitignore
vendored
Normal file
@ -0,0 +1,25 @@
|
||||
.env
|
||||
venv/
|
||||
__pycache__/
|
||||
*.pyc
|
||||
*.pyo
|
||||
*.pyd
|
||||
.Python
|
||||
env/
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.log
|
||||
.git
|
||||
.mypy_cache
|
||||
.pytest_cache
|
||||
.hypothesis
|
||||
*.db
|
||||
*.sqlite
|
||||
*.sqlite3
|
||||
109
main.py
Normal file
109
main.py
Normal file
@ -0,0 +1,109 @@
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from src.database.manager import setup_database_tables
|
||||
from src.database.video_operations import save_videos_to_database, save_video_features_to_database, get_unrated_videos_from_database
|
||||
from src.database.preference_operations import save_video_rating_to_database, get_training_data_from_database, get_unrated_videos_with_features_from_database, get_rated_count_from_database
|
||||
|
||||
from src.youtube.search import search_youtube_videos_by_query, get_coding_search_queries
|
||||
from src.youtube.details import get_video_details_from_youtube
|
||||
from src.youtube.utils import remove_duplicate_videos
|
||||
|
||||
from src.ml.feature_extraction import extract_all_features_from_video
|
||||
from src.ml.model_training import create_recommendation_model, train_model_on_user_preferences
|
||||
from src.ml.predictions import predict_video_preferences_with_model
|
||||
|
||||
from src.rating.display import display_video_information_for_rating, display_rating_session_header, display_session_type_message
|
||||
from src.rating.user_input import get_user_rating_response, get_user_notes_for_rating
|
||||
from src.rating.session import process_user_rating_for_video, should_continue_rating_session, has_videos_to_rate
|
||||
|
||||
load_dotenv()
|
||||
|
||||
class VideoInspirationFinderApp:
|
||||
def __init__(self, api_key: str):
|
||||
self.api_key = api_key
|
||||
self.db_path = "video_inspiration.db"
|
||||
self.model = None
|
||||
self.model_trained = False
|
||||
|
||||
setup_database_tables(self.db_path)
|
||||
|
||||
def search_and_save_coding_videos(self):
|
||||
print("🔍 Searching for coding videos...")
|
||||
|
||||
all_videos = []
|
||||
search_queries = get_coding_search_queries()
|
||||
|
||||
for query in search_queries[:5]:
|
||||
video_ids = search_youtube_videos_by_query(self.api_key, query, 10)
|
||||
videos = get_video_details_from_youtube(self.api_key, video_ids)
|
||||
all_videos.extend(videos)
|
||||
|
||||
unique_videos = remove_duplicate_videos(all_videos)
|
||||
|
||||
save_videos_to_database(unique_videos, self.db_path)
|
||||
|
||||
for video in unique_videos:
|
||||
features = extract_all_features_from_video(video)
|
||||
save_video_features_to_database(video['id'], features, self.db_path)
|
||||
|
||||
print(f"Found and saved {len(unique_videos)} videos")
|
||||
|
||||
def start_interactive_rating_session(self):
|
||||
display_rating_session_header()
|
||||
|
||||
while True:
|
||||
videos = self._get_videos_for_rating()
|
||||
rated_count = get_rated_count_from_database(self.db_path)
|
||||
session_message = display_session_type_message(self.model_trained, rated_count)
|
||||
|
||||
print(f"\n{session_message}")
|
||||
|
||||
if not has_videos_to_rate(videos):
|
||||
print("No more videos to rate!")
|
||||
break
|
||||
|
||||
for video in videos:
|
||||
display_video_information_for_rating(video)
|
||||
|
||||
response = get_user_rating_response()
|
||||
|
||||
if not should_continue_rating_session(response):
|
||||
return
|
||||
|
||||
def save_rating(video_id, liked, notes):
|
||||
save_video_rating_to_database(video_id, liked, notes, self.db_path)
|
||||
|
||||
process_user_rating_for_video(video, response, save_rating, get_user_notes_for_rating)
|
||||
self._try_train_model()
|
||||
|
||||
def _get_videos_for_rating(self):
|
||||
if self.model_trained and self.model:
|
||||
video_features = get_unrated_videos_with_features_from_database(self.db_path)
|
||||
return predict_video_preferences_with_model(self.model, video_features)
|
||||
else:
|
||||
return get_unrated_videos_from_database(10, self.db_path)
|
||||
|
||||
def _try_train_model(self):
|
||||
if not self.model_trained:
|
||||
if not self.model:
|
||||
self.model = create_recommendation_model()
|
||||
|
||||
training_data = get_training_data_from_database(self.db_path)
|
||||
success = train_model_on_user_preferences(self.model, training_data)
|
||||
if success:
|
||||
self.model_trained = True
|
||||
|
||||
def main():
|
||||
api_key = os.getenv('YOUTUBE_API_KEY')
|
||||
if not api_key:
|
||||
print("Error: YOUTUBE_API_KEY not found in environment variables")
|
||||
print("Please create a .env file with your YouTube API key")
|
||||
return
|
||||
|
||||
app = VideoInspirationFinderApp(api_key)
|
||||
app.search_and_save_coding_videos()
|
||||
app.start_interactive_rating_session()
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
23
setup.sh
Executable file
23
setup.sh
Executable file
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo "🔧 Setting up Video Inspiration Finder..."
|
||||
|
||||
# Create virtual environment if it doesn't exist
|
||||
if [ ! -d "venv" ]; then
|
||||
echo "📦 Creating virtual environment..."
|
||||
python -m venv venv
|
||||
fi
|
||||
|
||||
# Activate virtual environment
|
||||
echo "🔄 Activating virtual environment..."
|
||||
source venv/bin/activate
|
||||
|
||||
# Install dependencies
|
||||
echo "📚 Installing dependencies..."
|
||||
pip install requests pandas scikit-learn numpy python-dotenv
|
||||
|
||||
echo "✅ Setup complete!"
|
||||
echo "🚀 Running Video Inspiration Finder..."
|
||||
|
||||
# Run the main script
|
||||
python main.py
|
||||
1
src/__init__.py
Normal file
1
src/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Video Inspiration Finder package
|
||||
1
src/database/__init__.py
Normal file
1
src/database/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Database operations package
|
||||
56
src/database/manager.py
Normal file
56
src/database/manager.py
Normal file
@ -0,0 +1,56 @@
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from typing import List, Dict
|
||||
|
||||
def setup_database_tables(db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS videos (
|
||||
id TEXT PRIMARY KEY,
|
||||
title TEXT,
|
||||
description TEXT,
|
||||
view_count INTEGER,
|
||||
like_count INTEGER,
|
||||
comment_count INTEGER,
|
||||
duration TEXT,
|
||||
published_at TEXT,
|
||||
channel_name TEXT,
|
||||
thumbnail_url TEXT,
|
||||
tags TEXT,
|
||||
category_id INTEGER,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
)
|
||||
''')
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS preferences (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
video_id TEXT,
|
||||
liked BOOLEAN,
|
||||
notes TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (video_id) REFERENCES videos (id)
|
||||
)
|
||||
''')
|
||||
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS video_features (
|
||||
video_id TEXT PRIMARY KEY,
|
||||
title_length INTEGER,
|
||||
description_length INTEGER,
|
||||
view_like_ratio REAL,
|
||||
engagement_score REAL,
|
||||
title_sentiment REAL,
|
||||
has_tutorial_keywords BOOLEAN,
|
||||
has_time_constraint BOOLEAN,
|
||||
has_beginner_keywords BOOLEAN,
|
||||
has_ai_keywords BOOLEAN,
|
||||
has_challenge_keywords BOOLEAN,
|
||||
FOREIGN KEY (video_id) REFERENCES videos (id)
|
||||
)
|
||||
''')
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
46
src/database/preference_operations.py
Normal file
46
src/database/preference_operations.py
Normal file
@ -0,0 +1,46 @@
|
||||
import sqlite3
|
||||
import pandas as pd
|
||||
|
||||
def save_video_rating_to_database(video_id: str, liked: bool, notes: str, db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
INSERT INTO preferences (video_id, liked, notes) VALUES (?, ?, ?)
|
||||
''', (video_id, liked, notes))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_training_data_from_database(db_path: str) -> pd.DataFrame:
|
||||
conn = sqlite3.connect(db_path)
|
||||
query = '''
|
||||
SELECT vf.*, p.liked
|
||||
FROM video_features vf
|
||||
JOIN preferences p ON vf.video_id = p.video_id
|
||||
'''
|
||||
df = pd.read_sql_query(query, conn)
|
||||
conn.close()
|
||||
return df
|
||||
|
||||
def get_unrated_videos_with_features_from_database(db_path: str) -> pd.DataFrame:
|
||||
conn = sqlite3.connect(db_path)
|
||||
query = '''
|
||||
SELECT v.*, vf.*
|
||||
FROM videos v
|
||||
JOIN video_features vf ON v.id = vf.video_id
|
||||
LEFT JOIN preferences p ON v.id = p.video_id
|
||||
WHERE p.video_id IS NULL
|
||||
ORDER BY v.view_count DESC
|
||||
'''
|
||||
df = pd.read_sql_query(query, conn)
|
||||
conn.close()
|
||||
return df
|
||||
|
||||
def get_rated_count_from_database(db_path: str) -> int:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("SELECT COUNT(*) FROM preferences")
|
||||
count = cursor.fetchone()[0]
|
||||
conn.close()
|
||||
return count
|
||||
58
src/database/video_operations.py
Normal file
58
src/database/video_operations.py
Normal file
@ -0,0 +1,58 @@
|
||||
import sqlite3
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Tuple
|
||||
|
||||
def save_videos_to_database(videos: List[Dict], db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
for video in videos:
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO videos VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (
|
||||
video['id'], video['title'], video['description'],
|
||||
video['view_count'], video['like_count'], video['comment_count'],
|
||||
video['duration'], video['published_at'], video['channel_name'],
|
||||
video['thumbnail_url'], video['tags'], video['category_id'],
|
||||
datetime.now().isoformat()
|
||||
))
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def save_video_features_to_database(video_id: str, features: Tuple, db_path: str):
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
INSERT OR REPLACE INTO video_features VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
''', (video_id,) + features)
|
||||
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_unrated_videos_from_database(limit: int, db_path: str) -> List[Dict]:
|
||||
conn = sqlite3.connect(db_path)
|
||||
cursor = conn.cursor()
|
||||
|
||||
cursor.execute('''
|
||||
SELECT v.*
|
||||
FROM videos v
|
||||
LEFT JOIN preferences p ON v.id = p.video_id
|
||||
WHERE p.video_id IS NULL
|
||||
ORDER BY v.view_count DESC
|
||||
LIMIT ?
|
||||
''', (limit,))
|
||||
|
||||
videos = []
|
||||
for row in cursor.fetchall():
|
||||
videos.append({
|
||||
'id': row[0],
|
||||
'title': row[1],
|
||||
'channel_name': row[8],
|
||||
'view_count': row[3],
|
||||
'url': f"https://www.youtube.com/watch?v={row[0]}"
|
||||
})
|
||||
|
||||
conn.close()
|
||||
return videos
|
||||
1
src/ml/__init__.py
Normal file
1
src/ml/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Machine learning operations package
|
||||
42
src/ml/feature_extraction.py
Normal file
42
src/ml/feature_extraction.py
Normal file
@ -0,0 +1,42 @@
|
||||
from typing import Dict, Tuple
|
||||
|
||||
def calculate_basic_video_metrics(video: Dict) -> Tuple:
|
||||
title_length = len(video['title'])
|
||||
description_length = len(video['description'])
|
||||
view_like_ratio = video['like_count'] / max(video['view_count'], 1)
|
||||
engagement_score = (video['like_count'] + video['comment_count']) / max(video['view_count'], 1)
|
||||
|
||||
return (title_length, description_length, view_like_ratio, engagement_score)
|
||||
|
||||
def detect_keyword_features_in_video(title: str, description: str) -> Tuple:
|
||||
tutorial_keywords = ['tutorial', 'learn', 'course', 'guide', 'how to']
|
||||
time_keywords = ['24 hours', '1 day', '1 hour', 'minutes', 'seconds', 'crash course']
|
||||
beginner_keywords = ['beginner', 'start', 'basics', 'introduction', 'getting started']
|
||||
ai_keywords = ['ai', 'artificial intelligence', 'machine learning', 'neural network']
|
||||
challenge_keywords = ['challenge', 'build', 'create', 'project', 'coding']
|
||||
|
||||
has_tutorial = any(kw in title or kw in description for kw in tutorial_keywords)
|
||||
has_time_constraint = any(kw in title for kw in time_keywords)
|
||||
has_beginner = any(kw in title or kw in description for kw in beginner_keywords)
|
||||
has_ai = any(kw in title or kw in description for kw in ai_keywords)
|
||||
has_challenge = any(kw in title for kw in challenge_keywords)
|
||||
|
||||
return (has_tutorial, has_time_constraint, has_beginner, has_ai, has_challenge)
|
||||
|
||||
def calculate_title_sentiment_score(title: str) -> float:
|
||||
positive_words = ['amazing', 'best', 'awesome', 'great', 'perfect', 'love', 'incredible']
|
||||
negative_words = ['hard', 'difficult', 'impossible', 'failed', 'broke', 'wrong']
|
||||
|
||||
positive_count = sum(1 for word in positive_words if word in title)
|
||||
negative_count = sum(1 for word in negative_words if word in title)
|
||||
return positive_count - negative_count
|
||||
|
||||
def extract_all_features_from_video(video: Dict) -> Tuple:
|
||||
title = video['title'].lower()
|
||||
description = video['description'].lower()
|
||||
|
||||
basic_metrics = calculate_basic_video_metrics(video)
|
||||
keyword_features = detect_keyword_features_in_video(title, description)
|
||||
sentiment_score = calculate_title_sentiment_score(title)
|
||||
|
||||
return basic_metrics + keyword_features + (sentiment_score,)
|
||||
23
src/ml/model_training.py
Normal file
23
src/ml/model_training.py
Normal file
@ -0,0 +1,23 @@
|
||||
from sklearn.ensemble import RandomForestClassifier
|
||||
import pandas as pd
|
||||
|
||||
def create_recommendation_model():
|
||||
return RandomForestClassifier(n_estimators=100, random_state=42)
|
||||
|
||||
def train_model_on_user_preferences(model, training_data: pd.DataFrame) -> bool:
|
||||
if len(training_data) < 10:
|
||||
print("Need at least 10 rated videos to train model")
|
||||
return False
|
||||
|
||||
feature_columns = [
|
||||
'title_length', 'description_length', 'view_like_ratio', 'engagement_score',
|
||||
'title_sentiment', 'has_tutorial_keywords', 'has_time_constraint',
|
||||
'has_beginner_keywords', 'has_ai_keywords', 'has_challenge_keywords'
|
||||
]
|
||||
|
||||
X = training_data[feature_columns]
|
||||
y = training_data['liked']
|
||||
|
||||
model.fit(X, y)
|
||||
print(f"Model trained on {len(training_data)} rated videos")
|
||||
return True
|
||||
33
src/ml/predictions.py
Normal file
33
src/ml/predictions.py
Normal file
@ -0,0 +1,33 @@
|
||||
from typing import List, Dict
|
||||
import pandas as pd
|
||||
|
||||
def predict_video_preferences_with_model(model, video_features: pd.DataFrame) -> List[Dict]:
|
||||
if video_features.empty:
|
||||
return []
|
||||
|
||||
feature_columns = [
|
||||
'title_length', 'description_length', 'view_like_ratio', 'engagement_score',
|
||||
'title_sentiment', 'has_tutorial_keywords', 'has_time_constraint',
|
||||
'has_beginner_keywords', 'has_ai_keywords', 'has_challenge_keywords'
|
||||
]
|
||||
|
||||
X = video_features[feature_columns]
|
||||
probabilities = model.predict_proba(X)[:, 1]
|
||||
|
||||
video_features_copy = video_features.copy()
|
||||
video_features_copy['like_probability'] = probabilities
|
||||
|
||||
top_videos = video_features_copy.nlargest(10, 'like_probability')
|
||||
|
||||
recommendations = []
|
||||
for _, row in top_videos.iterrows():
|
||||
recommendations.append({
|
||||
'id': row['id'],
|
||||
'title': row['title'],
|
||||
'channel_name': row['channel_name'],
|
||||
'view_count': row['view_count'],
|
||||
'url': f"https://www.youtube.com/watch?v={row['id']}",
|
||||
'like_probability': row['like_probability']
|
||||
})
|
||||
|
||||
return recommendations
|
||||
1
src/rating/__init__.py
Normal file
1
src/rating/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# Rating system operations package
|
||||
20
src/rating/display.py
Normal file
20
src/rating/display.py
Normal file
@ -0,0 +1,20 @@
|
||||
from typing import Dict
|
||||
|
||||
def display_video_information_for_rating(video: Dict):
|
||||
print(f"\n{'='*50}")
|
||||
print(f"Title: {video['title']}")
|
||||
print(f"Channel: {video['channel_name']}")
|
||||
print(f"Views: {video['view_count']:,}")
|
||||
print(f"URL: {video['url']}")
|
||||
print(f"{'='*50}")
|
||||
|
||||
def display_rating_session_header():
|
||||
print("🎯 Video Inspiration Finder - Interactive Session")
|
||||
print("Rate videos with 'y' (like), 'n' (dislike), 'q' (quit)")
|
||||
|
||||
def display_session_type_message(is_ml_ready: bool, rated_count: int) -> str:
|
||||
if is_ml_ready:
|
||||
return "📊 ML Recommendations based on your preferences:"
|
||||
else:
|
||||
remaining_needed = 10 - rated_count
|
||||
return f"📹 Unrated videos (need {remaining_needed} more to train ML):"
|
||||
17
src/rating/session.py
Normal file
17
src/rating/session.py
Normal file
@ -0,0 +1,17 @@
|
||||
from typing import List, Dict
|
||||
|
||||
def process_user_rating_for_video(video: Dict, response: str, save_rating_func, get_notes_func):
|
||||
if response == 'y':
|
||||
notes = get_notes_func(True)
|
||||
save_rating_func(video['id'], True, notes)
|
||||
print(f"Rated video {video['id']}: 👍")
|
||||
elif response == 'n':
|
||||
notes = get_notes_func(False)
|
||||
save_rating_func(video['id'], False, notes)
|
||||
print(f"Rated video {video['id']}: 👎")
|
||||
|
||||
def should_continue_rating_session(response: str) -> bool:
|
||||
return response != 'q'
|
||||
|
||||
def has_videos_to_rate(videos: List[Dict]) -> bool:
|
||||
return len(videos) > 0
|
||||
12
src/rating/user_input.py
Normal file
12
src/rating/user_input.py
Normal file
@ -0,0 +1,12 @@
|
||||
def get_user_rating_response() -> str:
|
||||
while True:
|
||||
response = input("Rate this video (y/n/q): ").strip().lower()
|
||||
if response in ['y', 'n', 'q']:
|
||||
return response
|
||||
print("Please enter 'y', 'n', or 'q'")
|
||||
|
||||
def get_user_notes_for_rating(liked: bool) -> str:
|
||||
if liked:
|
||||
return input("Why did you like it? (optional): ").strip()
|
||||
else:
|
||||
return input("Why didn't you like it? (optional): ").strip()
|
||||
1
src/youtube/__init__.py
Normal file
1
src/youtube/__init__.py
Normal file
@ -0,0 +1 @@
|
||||
# YouTube API operations package
|
||||
67
src/youtube/details.py
Normal file
67
src/youtube/details.py
Normal file
@ -0,0 +1,67 @@
|
||||
import requests
|
||||
import json
|
||||
from typing import List, Dict
|
||||
|
||||
def get_video_details_from_youtube(api_key: str, video_ids: List[str]) -> List[Dict]:
|
||||
if not video_ids:
|
||||
return []
|
||||
|
||||
details_url = "https://www.googleapis.com/youtube/v3/videos"
|
||||
params = {
|
||||
'key': api_key,
|
||||
'id': ','.join(video_ids),
|
||||
'part': 'snippet,statistics,contentDetails'
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(details_url, params=params)
|
||||
data = response.json()
|
||||
|
||||
videos = []
|
||||
for item in data.get('items', []):
|
||||
video = parse_youtube_video_response(item)
|
||||
if is_relevant_coding_video(video):
|
||||
videos.append(video)
|
||||
|
||||
return videos
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error getting video details: {e}")
|
||||
return []
|
||||
|
||||
def parse_youtube_video_response(item: Dict) -> Dict:
|
||||
snippet = item['snippet']
|
||||
statistics = item['statistics']
|
||||
|
||||
return {
|
||||
'id': item['id'],
|
||||
'title': snippet['title'],
|
||||
'description': snippet['description'],
|
||||
'view_count': int(statistics.get('viewCount', 0)),
|
||||
'like_count': int(statistics.get('likeCount', 0)),
|
||||
'comment_count': int(statistics.get('commentCount', 0)),
|
||||
'duration': item['contentDetails']['duration'],
|
||||
'published_at': snippet['publishedAt'],
|
||||
'channel_name': snippet['channelTitle'],
|
||||
'thumbnail_url': snippet['thumbnails']['high']['url'],
|
||||
'tags': json.dumps(snippet.get('tags', [])),
|
||||
'category_id': int(snippet.get('categoryId', 0)),
|
||||
'url': f"https://www.youtube.com/watch?v={item['id']}"
|
||||
}
|
||||
|
||||
def is_relevant_coding_video(video: Dict) -> bool:
|
||||
title = video['title'].lower()
|
||||
description = video['description'].lower()
|
||||
|
||||
programming_keywords = [
|
||||
'coding', 'programming', 'javascript', 'python', 'react', 'web development',
|
||||
'tutorial', 'learn', 'build', 'create', 'app', 'website', 'algorithm', 'ai'
|
||||
]
|
||||
|
||||
if video['view_count'] < 100000:
|
||||
return False
|
||||
|
||||
has_programming = any(keyword in title or keyword in description
|
||||
for keyword in programming_keywords)
|
||||
|
||||
return has_programming
|
||||
43
src/youtube/search.py
Normal file
43
src/youtube/search.py
Normal file
@ -0,0 +1,43 @@
|
||||
import requests
|
||||
from typing import List, Dict
|
||||
|
||||
def search_youtube_videos_by_query(api_key: str, query: str, max_results: int) -> List[Dict]:
|
||||
search_url = "https://www.googleapis.com/youtube/v3/search"
|
||||
params = {
|
||||
'key': api_key,
|
||||
'q': query,
|
||||
'part': 'snippet',
|
||||
'type': 'video',
|
||||
'order': 'viewCount',
|
||||
'maxResults': max_results,
|
||||
'videoCategoryId': '28',
|
||||
'publishedAfter': '2020-01-01T00:00:00Z'
|
||||
}
|
||||
|
||||
try:
|
||||
response = requests.get(search_url, params=params)
|
||||
data = response.json()
|
||||
|
||||
if 'items' not in data:
|
||||
return []
|
||||
|
||||
video_ids = [item['id']['videoId'] for item in data['items']]
|
||||
return video_ids
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error searching videos: {e}")
|
||||
return []
|
||||
|
||||
def get_coding_search_queries() -> List[str]:
|
||||
return [
|
||||
"coding tutorial millions views",
|
||||
"programming challenge viral",
|
||||
"I built app hours",
|
||||
"learn programming beginner",
|
||||
"coding project from scratch",
|
||||
"AI coding tutorial",
|
||||
"web development crash course",
|
||||
"javascript tutorial millions",
|
||||
"python tutorial viral",
|
||||
"coding in 24 hours"
|
||||
]
|
||||
12
src/youtube/utils.py
Normal file
12
src/youtube/utils.py
Normal file
@ -0,0 +1,12 @@
|
||||
from typing import List, Dict
|
||||
|
||||
def remove_duplicate_videos(videos: List[Dict]) -> List[Dict]:
|
||||
seen_ids = set()
|
||||
unique_videos = []
|
||||
|
||||
for video in videos:
|
||||
if video['id'] not in seen_ids:
|
||||
seen_ids.add(video['id'])
|
||||
unique_videos.append(video)
|
||||
|
||||
return unique_videos
|
||||
Loading…
x
Reference in New Issue
Block a user