moodle-scraper/main.py

77 lines
2.6 KiB
Python
Raw Permalink Normal View History

2025-01-25 16:20:03 +01:00
# update_study_material.py
import os
import shutil
import tempfile
from moodle_downloader import MoodleDownloader
from course_content_extractor import CourseContentExtractor
from dotenv import load_dotenv
import logging
def main():
# Configure logging
logging.basicConfig(
level=logging.DEBUG, # Changed from INFO to DEBUG for detailed logs
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler()
]
)
# Load environment variables
load_dotenv()
root_dir = os.getenv('STUDY_MATERIAL_ROOT_DIR')
if not root_dir:
print("Please set the STUDY_MATERIAL_ROOT_DIR environment variable.")
logging.error("STUDY_MATERIAL_ROOT_DIR environment variable not set.")
return
# Check if root_dir exists and is a directory
if not os.path.isdir(root_dir):
print(f"The specified STUDY_MATERIAL_ROOT_DIR does not exist or is not a directory: {root_dir}")
logging.error(f"Invalid STUDY_MATERIAL_ROOT_DIR: {root_dir}")
return
# Treat root_dir as the study_program folder
study_program = os.path.basename(os.path.normpath(root_dir))
logging.info(f"Using root_dir as the study_program: {study_program}")
# Use system temporary directory for downloads
with tempfile.TemporaryDirectory() as download_dir:
logging.info(f"Using temporary download directory: {download_dir}")
# Load credentials from environment variables
username = os.getenv('MOODLE_USERNAME')
password = os.getenv('MOODLE_PASSWORD')
if not username or not password:
print("Please set your Moodle credentials in environment variables.")
logging.error("Moodle credentials not set in environment variables.")
return
# Initialize downloader
downloader = MoodleDownloader(username, password, download_dir=download_dir, headless=True)
try:
downloader.login()
downloader.get_courses()
downloader.download_all_courses()
finally:
downloader.close()
# Assign study_program to each course
for course in downloader.courses:
course['StudyProgram'] = study_program
# Initialize extractor
extractor = CourseContentExtractor(download_dir=download_dir, root_dir=root_dir)
extractor.extract_contents(downloader.courses)
# Temporary directory is automatically cleaned up here
logging.info("Temporary download directory has been cleaned up.")
print("Study materials have been updated successfully.")
if __name__ == "__main__":
main()