diff --git a/README.md b/README.md new file mode 100644 index 0000000..b5b4abf --- /dev/null +++ b/README.md @@ -0,0 +1,92 @@ +# Video Inspiration Finder šÆ + +AI-powered YouTube video recommendation system that learns your preferences to suggest coding videos you'll love. + +## Features + +- š **Smart Search**: Finds viral coding videos from YouTube +- š¤ **Machine Learning**: RandomForest model learns your preferences +- š **Web Dashboard**: YouTube-like interface showing personalized recommendations +- š **Privacy First**: All data stored locally in SQLite +- ā” **Fast Setup**: One command installation and execution + +## Quick Start + +### Option 1: Command Line Interface +```bash +./setup.sh +``` +This will: +1. Create virtual environment +2. Install dependencies +3. Search for videos +4. Start interactive rating session + +### Option 2: Web Dashboard +```bash +./setup.sh # First time setup +python run_dashboard.py +``` +Opens a YouTube-like dashboard at `http://localhost:5000` + +## How It Works + +1. **Search Phase**: Queries YouTube API for trending coding videos +2. **Rating Phase**: You rate videos as like/dislike with optional notes +3. **Learning Phase**: After 10+ ratings, ML model activates +4. **Recommendation Phase**: AI suggests videos based on your preferences + +## Project Structure + +``` +src/ +āāā database/ # SQLite operations +āāā youtube/ # YouTube API integration +āāā ml/ # Machine learning pipeline +āāā rating/ # Interactive rating system + +main.py # CLI application +dashboard_api.py # Web API server +templates/ # Dashboard HTML/CSS/JS +``` + +## ML Pipeline + +- **Features**: 11 extracted features (keywords, engagement, sentiment) +- **Model**: RandomForest with 100 trees +- **Training**: Incremental learning after each rating +- **Prediction**: Confidence scores for video recommendations + +## Requirements + +- Python 3.7+ +- YouTube Data API v3 key +- SQLite (included with Python) + +## Setup + +1. Get YouTube API key from [Google Cloud Console](https://console.cloud.google.com/) +2. Create `.env` file: + ``` + YOUTUBE_API_KEY=your_api_key_here + ``` +3. Run `./setup.sh` + +## Dashboard Features + +- š± Responsive YouTube-like design +- šÆ AI confidence scores for each recommendation +- š Real-time model status (learning vs trained) +- š±ļø Click videos to open in YouTube +- š Visual feedback on model training progress + +## Commands + +```bash +./setup.sh # Full setup and CLI +python main.py # CLI only +python run_dashboard.py # Web dashboard +python dashboard_api.py # API server only +``` + +Built with Python, Flask, scikit-learn, and YouTube Data API v3. \ No newline at end of file diff --git a/dashboard_api.py b/dashboard_api.py new file mode 100644 index 0000000..2505429 --- /dev/null +++ b/dashboard_api.py @@ -0,0 +1,91 @@ +import os +from flask import Flask, jsonify, render_template +from flask_cors import CORS +from dotenv import load_dotenv + +from src.database.manager import setup_database_tables +from src.database.preference_operations import get_training_data_from_database, get_unrated_videos_with_features_from_database, get_rated_count_from_database +from src.database.video_operations import get_unrated_videos_from_database +from src.ml.model_training import create_recommendation_model, train_model_on_user_preferences +from src.ml.predictions import predict_video_preferences_with_model + +load_dotenv() + +app = Flask(__name__) +CORS(app) + +class DashboardAPI: + def __init__(self): + self.db_path = "video_inspiration.db" + self.model = None + self.model_trained = False + setup_database_tables(self.db_path) + self._initialize_model() + + def _initialize_model(self): + rated_count = get_rated_count_from_database(self.db_path) + if rated_count >= 10: + self.model = create_recommendation_model() + training_data = get_training_data_from_database(self.db_path) + success = train_model_on_user_preferences(self.model, training_data) + if success: + self.model_trained = True + + def get_recommendations(self): + if self.model_trained and self.model: + video_features = get_unrated_videos_with_features_from_database(self.db_path) + recommendations = predict_video_preferences_with_model(self.model, video_features) + return recommendations[:12] # Return 12 videos for dashboard + else: + fallback_videos = get_unrated_videos_from_database(12, self.db_path) + for video in fallback_videos: + video['like_probability'] = 0.5 # Default probability + return fallback_videos + +dashboard_api = DashboardAPI() + +@app.route('/') +def dashboard(): + return render_template('dashboard.html') + +@app.route('/api/recommendations') +def get_recommendations(): + try: + recommendations = dashboard_api.get_recommendations() + + formatted_recommendations = [] + for video in recommendations: + formatted_recommendations.append({ + 'id': video['id'], + 'title': video['title'], + 'channel_name': video['channel_name'], + 'view_count': video['view_count'], + 'url': video['url'], + 'thumbnail': f"https://img.youtube.com/vi/{video['id']}/hqdefault.jpg", + 'confidence': round(video.get('like_probability', 0.5) * 100), + 'views_formatted': format_view_count(video['view_count']) + }) + + return jsonify({ + 'success': True, + 'videos': formatted_recommendations, + 'model_trained': dashboard_api.model_trained, + 'total_ratings': get_rated_count_from_database(dashboard_api.db_path) + }) + + except Exception as e: + return jsonify({ + 'success': False, + 'error': str(e) + }), 500 + +def format_view_count(count): + if count >= 1000000: + return f"{count/1000000:.1f}M views" + elif count >= 1000: + return f"{count/1000:.1f}K views" + else: + return f"{count} views" + +if __name__ == '__main__': + app.run(debug=True, port=5001) \ No newline at end of file diff --git a/run_dashboard.py b/run_dashboard.py new file mode 100755 index 0000000..0b65f04 --- /dev/null +++ b/run_dashboard.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +import os +import subprocess +import sys +import time +from pathlib import Path + +def check_database_exists(): + return Path("video_inspiration.db").exists() + +def check_has_videos(): + if not check_database_exists(): + return False + + import sqlite3 + try: + conn = sqlite3.connect("video_inspiration.db") + cursor = conn.cursor() + cursor.execute("SELECT COUNT(*) FROM videos") + count = cursor.fetchone()[0] + conn.close() + return count > 0 + except: + return False + +def main(): + print("š Video Inspiration Dashboard") + print("=" * 40) + + # Check if database and videos exist + if not check_has_videos(): + print("ā ļø No videos found in database!") + print("\nOptions:") + print("1. Run main application first to search and rate videos") + print("2. Continue with empty dashboard (demo mode)") + + choice = input("\nEnter choice (1/2): ").strip() + + if choice == "1": + print("\nš Running main application first...") + try: + subprocess.run([sys.executable, "main.py"], check=True) + except KeyboardInterrupt: + print("\nš Switching to dashboard...") + time.sleep(1) + except Exception as e: + print(f"Error running main app: {e}") + return + + # Start dashboard + print("\nš Starting dashboard server...") + print("š± Dashboard will be available at: http://localhost:5000") + print("š Press Ctrl+C to stop the server") + print("-" * 40) + + try: + subprocess.run([sys.executable, "dashboard_api.py"], check=True) + except KeyboardInterrupt: + print("\nš Dashboard stopped!") + except Exception as e: + print(f"Error starting dashboard: {e}") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/search_more_videos.py b/search_more_videos.py new file mode 100644 index 0000000..4c54889 --- /dev/null +++ b/search_more_videos.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 +import os +from dotenv import load_dotenv + +from src.database.manager import setup_database_tables +from src.database.video_operations import save_videos_to_database, save_video_features_to_database +from src.youtube.search import search_youtube_videos_by_query, get_coding_search_queries +from src.youtube.details import get_video_details_from_youtube +from src.youtube.utils import remove_duplicate_videos +from src.ml.feature_extraction import extract_all_features_from_video + +load_dotenv() + +def search_more_videos(): + api_key = os.getenv('YOUTUBE_API_KEY') + if not api_key: + print("Error: YOUTUBE_API_KEY not found in environment variables") + return + + db_path = "video_inspiration.db" + setup_database_tables(db_path) + + print("š Searching for more coding videos...") + + # Use different/additional search queries to find new videos + additional_queries = [ + "react tutorial 2024 millions views", + "python projects for beginners viral", + "full stack web development course", + "machine learning crash course", + "javascript frameworks comparison", + "coding interview preparation", + "docker tutorial for developers", + "git and github tutorial", + "database design tutorial", + "API development tutorial" + ] + + all_videos = [] + + for query in additional_queries: + print(f" Searching: {query}") + video_ids = search_youtube_videos_by_query(api_key, query, 10) + videos = get_video_details_from_youtube(api_key, video_ids) + all_videos.extend(videos) + + unique_videos = remove_duplicate_videos(all_videos) + + if unique_videos: + save_videos_to_database(unique_videos, db_path) + + for video in unique_videos: + features = extract_all_features_from_video(video) + save_video_features_to_database(video['id'], features, db_path) + + print(f"ā Found and saved {len(unique_videos)} new videos!") + else: + print("ā No new videos found.") + +if __name__ == "__main__": + search_more_videos() \ No newline at end of file diff --git a/setup.sh b/setup.sh index 7c1aefd..03b10e6 100755 --- a/setup.sh +++ b/setup.sh @@ -14,10 +14,93 @@ source venv/bin/activate # Install dependencies echo "š Installing dependencies..." -pip install requests pandas scikit-learn numpy python-dotenv +pip install requests pandas scikit-learn numpy python-dotenv flask flask-cors echo "ā Setup complete!" -echo "š Running Video Inspiration Finder..." -# Run the main script -python main.py \ No newline at end of file +# Function to check if database has videos +check_videos() { + if [ -f "video_inspiration.db" ]; then + video_count=$(sqlite3 video_inspiration.db "SELECT COUNT(*) FROM videos;" 2>/dev/null || echo "0") + echo $video_count + else + echo "0" + fi +} + +# Function to check unrated videos count +check_unrated_videos() { + if [ -f "video_inspiration.db" ]; then + unrated_count=$(sqlite3 video_inspiration.db "SELECT COUNT(*) FROM videos v LEFT JOIN preferences p ON v.id = p.video_id WHERE p.video_id IS NULL;" 2>/dev/null || echo "0") + echo $unrated_count + else + echo "0" + fi +} + +# Check current state +video_count=$(check_videos) +unrated_count=$(check_unrated_videos) + +echo "" +echo "š Current Status:" +echo " Total videos: $video_count" +echo " Unrated videos: $unrated_count" +echo "" + +# Main menu +echo "Choose what you want to do:" +echo "1. š Launch Dashboard (recommended)" +echo "2. š± Interactive CLI Rating Session" +echo "3. š Search for More Videos" +echo "4. š ļø Full Setup (Search + Rate + Dashboard)" +echo "" +read -p "Enter choice (1-4): " choice + +case $choice in + 1) + echo "" + echo "š Launching Dashboard..." + if [ "$unrated_count" -eq "0" ] && [ "$video_count" -gt "0" ]; then + echo "ā ļø All videos are rated. Searching for more videos first..." + python search_more_videos.py + elif [ "$video_count" -eq "0" ]; then + echo "ā ļø No videos found. Searching for videos first..." + python main.py --search-only 2>/dev/null || python search_more_videos.py + fi + echo "" + echo "š± Dashboard will be available at: http://localhost:5001" + echo "š Press Ctrl+C to stop the server" + echo "----------------------------------------" + python dashboard_api.py + ;; + 2) + echo "" + echo "š± Starting Interactive Rating Session..." + python main.py + ;; + 3) + echo "" + echo "š Searching for more videos..." + python search_more_videos.py + echo "" + echo "ā Search complete! You can now:" + echo " ⢠Run './setup.sh' again and choose option 1 for Dashboard" + echo " ⢠Run 'python dashboard_api.py' directly" + ;; + 4) + echo "" + echo "š ļø Running Full Setup..." + echo "š Step 1: Searching for videos..." + python main.py --search-only 2>/dev/null || python search_more_videos.py + echo "" + echo "š± Step 2: Starting rating session..." + echo "š” Tip: Rate at least 10 videos to activate AI recommendations" + echo " (You can press 'q' anytime to skip to dashboard)" + python main.py + ;; + *) + echo "ā Invalid choice. Please run './setup.sh' again." + exit 1 + ;; +esac \ No newline at end of file diff --git a/src/rating/display.py b/src/rating/display.py index fcba5b5..f87b9e4 100644 --- a/src/rating/display.py +++ b/src/rating/display.py @@ -16,5 +16,7 @@ def display_session_type_message(is_ml_ready: bool, rated_count: int) -> str: if is_ml_ready: return "š ML Recommendations based on your preferences:" else: - remaining_needed = 10 - rated_count + remaining_needed = max(0, 10 - rated_count) + if remaining_needed == 0: + return "š Ready to train ML model!" return f"š¹ Unrated videos (need {remaining_needed} more to train ML):" \ No newline at end of file diff --git a/templates/dashboard.html b/templates/dashboard.html new file mode 100644 index 0000000..4571954 --- /dev/null +++ b/templates/dashboard.html @@ -0,0 +1,382 @@ + + +
+ + +