From f21d2889598c84671cb6ee752b0bcd7b3b460307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Oliver=20Sch=C3=BCtz?= Date: Fri, 25 Oct 2024 22:22:19 +0200 Subject: [PATCH] Moodle Downloader and implementation test ( Written entirely by Openai-o1, with guidances from me ) --- src/backend/main.py | 34 +++++ src/backend/moodle_downloader.py | 241 +++++++++++++++++++++++++++++++ 2 files changed, 275 insertions(+) create mode 100644 src/backend/main.py create mode 100644 src/backend/moodle_downloader.py diff --git a/src/backend/main.py b/src/backend/main.py new file mode 100644 index 0000000..d7ed463 --- /dev/null +++ b/src/backend/main.py @@ -0,0 +1,34 @@ +import logging +from moodle_downloader import MoodleDownloader +import os + +# Configure logging +logging.basicConfig( + filename='moodle_downloader.log', + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s' +) + +# Get credentials from environment variables +USERNAME = os.getenv('MOODLE_USERNAME') +PASSWORD = os.getenv('MOODLE_PASSWORD') + +if not USERNAME or not PASSWORD: + print("Please set the MOODLE_USERNAME and MOODLE_PASSWORD environment variables.") + exit(1) + +# Create an instance of MoodleDownloader +downloader = MoodleDownloader(USERNAME, PASSWORD, headless=True) + +try: + # Login to Moodle + downloader.login() + + # Retrieve courses + downloader.get_courses() + + # Download all courses + downloader.download_all_courses() +finally: + # Close the browser + downloader.close() diff --git a/src/backend/moodle_downloader.py b/src/backend/moodle_downloader.py new file mode 100644 index 0000000..f4d344b --- /dev/null +++ b/src/backend/moodle_downloader.py @@ -0,0 +1,241 @@ +import os +import time +import logging +import requests +from selenium import webdriver +from selenium.webdriver.chrome.options import Options +from selenium.webdriver.common.by import By +from selenium.webdriver.common.keys import Keys +from selenium.webdriver.support.ui import WebDriverWait +from selenium.webdriver.support import expected_conditions as EC +from selenium.common.exceptions import TimeoutException +from selenium.webdriver.chrome.service import Service as ChromeService +from webdriver_manager.chrome import ChromeDriverManager + +class MoodleDownloader: + def __init__(self, username, password, download_dir=None, headless=False): + self.username = username + self.password = password + self.download_dir = download_dir or os.path.join(os.getcwd(), 'downloads') + self.headless = headless + self.driver = None + self.courses = [] + self.LOGIN_URL = 'https://moodle.fhgr.ch/login/index.php' + self.MY_COURSES_URL = 'https://moodle.fhgr.ch/my/courses.php' + + def setup_driver(self): + # Set up Chrome options + chrome_options = Options() + if self.headless: + chrome_options.add_argument('--headless') # Headless mode + chrome_options.add_argument('--no-sandbox') + chrome_options.add_argument('--disable-dev-shm-usage') + chrome_options.add_argument('--disable-gpu') # Optional + chrome_options.add_argument('--window-size=1920,1080') + + # Preferences for downloads + prefs = { + "download.default_directory": self.download_dir, + "download.prompt_for_download": False, + "download.directory_upgrade": True, + "safebrowsing.enabled": True, + "profile.default_content_setting_values.automatic_downloads": 1, + } + chrome_options.add_experimental_option("prefs", prefs) + + # Initialize WebDriver + logging.info("Initializing the WebDriver.") + self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options) + + def login(self): + self.setup_driver() + driver = self.driver + try: + # Open Moodle login page + logging.info(f"Opening Moodle login page: {self.LOGIN_URL}") + driver.get(self.LOGIN_URL) + + # Wait until the page is loaded + WebDriverWait(driver, 15).until( + EC.presence_of_element_located((By.TAG_NAME, 'body')) + ) + logging.info("Moodle login page loaded.") + + # Check for 'wayf_submit_button' and click if present + try: + logging.info("Checking for 'wayf_submit_button'.") + wayf_button = WebDriverWait(driver, 5).until( + EC.element_to_be_clickable((By.ID, 'wayf_submit_button')) + ) + wayf_button.click() + logging.info("'wayf_submit_button' found and clicked.") + + # Wait for redirection to login page + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.ID, 'username')) + ) + except TimeoutException: + logging.info("'wayf_submit_button' not found. Continuing with login.") + + # Wait for username and password fields + logging.info("Waiting for the username field.") + username_field = WebDriverWait(driver, 20).until( + EC.presence_of_element_located((By.ID, 'username')) + ) + + logging.info("Waiting for the password field.") + password_field = WebDriverWait(driver, 20).until( + EC.presence_of_element_located((By.ID, 'password')) + ) + + # Enter login credentials + logging.info("Entering login credentials.") + username_field.send_keys(self.username) + password_field.send_keys(self.password) + + # Submit the form + logging.info("Submitting the login form.") + password_field.send_keys(Keys.RETURN) + + # Wait for login to complete + logging.info("Waiting for login to complete.") + WebDriverWait(driver, 30).until( + EC.url_changes(self.LOGIN_URL) + ) + logging.info("Login successful.") + except Exception as e: + logging.error("An error occurred during login.", exc_info=True) + raise e + + def get_courses(self): + driver = self.driver + try: + # Navigate to "My Courses" page + logging.info(f"Navigating to 'My Courses' page: {self.MY_COURSES_URL}") + driver.get(self.MY_COURSES_URL) + + # Wait until the page is loaded + logging.info("Waiting for the 'My Courses' page to load.") + WebDriverWait(driver, 20).until( + EC.presence_of_element_located((By.CSS_SELECTOR, 'a.aalink.coursename')) + ) + logging.info("'My Courses' page loaded.") + + # Collect all courses from the page + logging.info("Collecting all courses from 'My Courses' page.") + course_elements = driver.find_elements(By.CSS_SELECTOR, 'a.aalink.coursename') + + logging.info(f"{len(course_elements)} courses found.") + + for coursename_element in course_elements: + try: + course_name = coursename_element.text.strip() + course_url = coursename_element.get_attribute('href') + self.courses.append({'CourseName': course_name, 'URL': course_url}) + logging.info(f"Course found: {course_name} - {course_url}") + except Exception as e: + logging.warning(f"Error extracting course: {e}") + continue + + if not self.courses: + logging.warning("No courses found. Check the HTML structure of the 'My Courses' page.") + print("No courses found. Check the HTML structure of the 'My Courses' page.") + except Exception as e: + logging.error("An error occurred while retrieving courses.", exc_info=True) + raise e + + def download_all_courses(self): + if not self.courses: + logging.warning("No courses to download.") + return + + driver = self.driver + + # Ensure the download directory exists + if not os.path.exists(self.download_dir): + os.makedirs(self.download_dir) + + course_counter = 1 + + for course in self.courses: + course_name = course['CourseName'] + course_url = course['URL'] + logging.info(f"Processing course: {course_name} - {course_url}") + driver.get(course_url) + + # Wait until the course page is loaded + WebDriverWait(driver, 20).until( + EC.presence_of_element_located((By.TAG_NAME, 'body')) + ) + + try: + logging.info("Searching for 'Download course content' link.") + download_link = driver.find_element(By.CSS_SELECTOR, 'a[data-downloadcourse="1"]') + download_page_url = download_link.get_attribute('href') + logging.info(f"Download page URL: {download_page_url}") + + # Open the download page + driver.get(download_page_url) + + # Wait until the page is loaded + WebDriverWait(driver, 10).until( + EC.presence_of_element_located((By.NAME, 'sesskey')) + ) + + # Extract 'sesskey' and 'contextid' + sesskey_input = driver.find_element(By.NAME, 'sesskey') + sesskey = sesskey_input.get_attribute('value') + contextid_input = driver.find_element(By.NAME, 'contextid') + contextid = contextid_input.get_attribute('value') + + logging.info(f"sesskey: {sesskey}, contextid: {contextid}") + + # Extract cookies from the Selenium session + logging.info("Extracting cookies from the Selenium session.") + selenium_cookies = driver.get_cookies() + cookies = {} + for cookie in selenium_cookies: + cookies[cookie['name']] = cookie['value'] + + # Prepare the HTTP POST request + download_url = 'https://moodle.fhgr.ch/course/downloadcontent.php' + post_data = { + 'sesskey': sesskey, + 'contextid': contextid, + 'download': 1 + } + headers = { + 'User-Agent': 'Mozilla/5.0', + 'Referer': download_page_url + } + + # Send the POST request with cookies + logging.info(f"Sending HTTP POST request for course '{course_name}'.") + with requests.Session() as session: + session.cookies.update(cookies) + response = session.post(download_url, data=post_data, headers=headers, stream=True) + response.raise_for_status() + + # Generate filename as course_N.zip + filename = f"course_{course_counter}.zip" + course_counter += 1 + filepath = os.path.join(self.download_dir, filename) + + # Overwrite existing files + if os.path.exists(filepath): + os.remove(filepath) + + with open(filepath, 'wb') as f: + for chunk in response.iter_content(chunk_size=8192): + f.write(chunk) + + logging.info(f"Course '{course_name}' downloaded and saved as '{filename}'.") + print(f"Course '{course_name}' downloaded.") + except Exception as e: + logging.error(f"Error downloading course '{course_name}': {e}", exc_info=True) + continue + + def close(self): + if self.driver: + logging.info("Closing the browser.") + self.driver.quit()