moodle-scraper/main.py

# update_study_material.py

import os
import shutil
import tempfile
from moodle_downloader import MoodleDownloader
from course_content_extractor import CourseContentExtractor
from dotenv import load_dotenv
import logging


def main():
    # Configure logging
    logging.basicConfig(
        level=logging.DEBUG,  # Changed from INFO to DEBUG for detailed logs
        format='%(asctime)s - %(levelname)s - %(message)s',
        handlers=[
            logging.StreamHandler()
        ]
    )

    # Load environment variables
    load_dotenv()
    root_dir = os.getenv('STUDY_MATERIAL_ROOT_DIR')
    if not root_dir:
        print("Please set the STUDY_MATERIAL_ROOT_DIR environment variable.")
        logging.error("STUDY_MATERIAL_ROOT_DIR environment variable not set.")
        return

    # Check if root_dir exists and is a directory
    if not os.path.isdir(root_dir):
        print(f"The specified STUDY_MATERIAL_ROOT_DIR does not exist or is not a directory: {root_dir}")
        logging.error(f"Invalid STUDY_MATERIAL_ROOT_DIR: {root_dir}")
        return

    # Treat root_dir as the study_program folder
    study_program = os.path.basename(os.path.normpath(root_dir))
    logging.info(f"Using root_dir as the study_program: {study_program}")

    # Use system temporary directory for downloads
    with tempfile.TemporaryDirectory() as download_dir:
        logging.info(f"Using temporary download directory: {download_dir}")

        # Load credentials from environment variables
        username = os.getenv('MOODLE_USERNAME')
        password = os.getenv('MOODLE_PASSWORD')

        if not username or not password:
            print("Please set your Moodle credentials in environment variables.")
            logging.error("Moodle credentials not set in environment variables.")
            return

        # Initialize downloader
        downloader = MoodleDownloader(username, password, download_dir=download_dir, headless=True)
        try:
            downloader.login()
            downloader.get_courses()
            downloader.download_all_courses()
        finally:
            downloader.close()

        # Assign study_program to each course
        for course in downloader.courses:
            course['StudyProgram'] = study_program

        # Initialize extractor
        extractor = CourseContentExtractor(download_dir=download_dir, root_dir=root_dir)
        extractor.extract_contents(downloader.courses)

    # Temporary directory is automatically cleaned up here
    logging.info("Temporary download directory has been cleaned up.")
    print("Study materials have been updated successfully.")


if __name__ == "__main__":
    main()
Init Repo 2025-01-25 16:20:03 +01:00			`# update_study_material.py`

			`import os`
			`import shutil`
			`import tempfile`
			`from moodle_downloader import MoodleDownloader`
			`from course_content_extractor import CourseContentExtractor`
			`from dotenv import load_dotenv`
			`import logging`


			`def main():`
			`# Configure logging`
			`logging.basicConfig(`
			`level=logging.DEBUG, # Changed from INFO to DEBUG for detailed logs`
			`format='%(asctime)s - %(levelname)s - %(message)s',`
			`handlers=[`
			`logging.StreamHandler()`
			`]`
			`)`

			`# Load environment variables`
			`load_dotenv()`
			`root_dir = os.getenv('STUDY_MATERIAL_ROOT_DIR')`
			`if not root_dir:`
			`print("Please set the STUDY_MATERIAL_ROOT_DIR environment variable.")`
			`logging.error("STUDY_MATERIAL_ROOT_DIR environment variable not set.")`
			`return`

			`# Check if root_dir exists and is a directory`
			`if not os.path.isdir(root_dir):`
			`print(f"The specified STUDY_MATERIAL_ROOT_DIR does not exist or is not a directory: {root_dir}")`
			`logging.error(f"Invalid STUDY_MATERIAL_ROOT_DIR: {root_dir}")`
			`return`

			`# Treat root_dir as the study_program folder`
			`study_program = os.path.basename(os.path.normpath(root_dir))`
			`logging.info(f"Using root_dir as the study_program: {study_program}")`

			`# Use system temporary directory for downloads`
			`with tempfile.TemporaryDirectory() as download_dir:`
			`logging.info(f"Using temporary download directory: {download_dir}")`

			`# Load credentials from environment variables`
			`username = os.getenv('MOODLE_USERNAME')`
			`password = os.getenv('MOODLE_PASSWORD')`

			`if not username or not password:`
			`print("Please set your Moodle credentials in environment variables.")`
			`logging.error("Moodle credentials not set in environment variables.")`
			`return`

			`# Initialize downloader`
			`downloader = MoodleDownloader(username, password, download_dir=download_dir, headless=True)`
			`try:`
			`downloader.login()`
			`downloader.get_courses()`
			`downloader.download_all_courses()`
			`finally:`
			`downloader.close()`

			`# Assign study_program to each course`
			`for course in downloader.courses:`
			`course['StudyProgram'] = study_program`

			`# Initialize extractor`
			`extractor = CourseContentExtractor(download_dir=download_dir, root_dir=root_dir)`
			`extractor.extract_contents(downloader.courses)`

			`# Temporary directory is automatically cleaned up here`
			`logging.info("Temporary download directory has been cleaned up.")`
			`print("Study materials have been updated successfully.")`


			`if __name__ == "__main__":`
			`main()`