Moodle Downloader and implementation test ( Written entirely by Openai-o1, with guidances from me )

main
Oliver Schütz 2024-10-25 22:22:19 +02:00
parent 517511ddb4
commit f21d288959
2 changed files with 275 additions and 0 deletions

34
src/backend/main.py Normal file
View File

@ -0,0 +1,34 @@
import logging
from moodle_downloader import MoodleDownloader
import os
# Configure logging
logging.basicConfig(
filename='moodle_downloader.log',
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# Get credentials from environment variables
USERNAME = os.getenv('MOODLE_USERNAME')
PASSWORD = os.getenv('MOODLE_PASSWORD')
if not USERNAME or not PASSWORD:
print("Please set the MOODLE_USERNAME and MOODLE_PASSWORD environment variables.")
exit(1)
# Create an instance of MoodleDownloader
downloader = MoodleDownloader(USERNAME, PASSWORD, headless=True)
try:
# Login to Moodle
downloader.login()
# Retrieve courses
downloader.get_courses()
# Download all courses
downloader.download_all_courses()
finally:
# Close the browser
downloader.close()

View File

@ -0,0 +1,241 @@
import os
import time
import logging
import requests
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
class MoodleDownloader:
def __init__(self, username, password, download_dir=None, headless=False):
self.username = username
self.password = password
self.download_dir = download_dir or os.path.join(os.getcwd(), 'downloads')
self.headless = headless
self.driver = None
self.courses = []
self.LOGIN_URL = 'https://moodle.fhgr.ch/login/index.php'
self.MY_COURSES_URL = 'https://moodle.fhgr.ch/my/courses.php'
def setup_driver(self):
# Set up Chrome options
chrome_options = Options()
if self.headless:
chrome_options.add_argument('--headless') # Headless mode
chrome_options.add_argument('--no-sandbox')
chrome_options.add_argument('--disable-dev-shm-usage')
chrome_options.add_argument('--disable-gpu') # Optional
chrome_options.add_argument('--window-size=1920,1080')
# Preferences for downloads
prefs = {
"download.default_directory": self.download_dir,
"download.prompt_for_download": False,
"download.directory_upgrade": True,
"safebrowsing.enabled": True,
"profile.default_content_setting_values.automatic_downloads": 1,
}
chrome_options.add_experimental_option("prefs", prefs)
# Initialize WebDriver
logging.info("Initializing the WebDriver.")
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
def login(self):
self.setup_driver()
driver = self.driver
try:
# Open Moodle login page
logging.info(f"Opening Moodle login page: {self.LOGIN_URL}")
driver.get(self.LOGIN_URL)
# Wait until the page is loaded
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.TAG_NAME, 'body'))
)
logging.info("Moodle login page loaded.")
# Check for 'wayf_submit_button' and click if present
try:
logging.info("Checking for 'wayf_submit_button'.")
wayf_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.ID, 'wayf_submit_button'))
)
wayf_button.click()
logging.info("'wayf_submit_button' found and clicked.")
# Wait for redirection to login page
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, 'username'))
)
except TimeoutException:
logging.info("'wayf_submit_button' not found. Continuing with login.")
# Wait for username and password fields
logging.info("Waiting for the username field.")
username_field = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.ID, 'username'))
)
logging.info("Waiting for the password field.")
password_field = WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.ID, 'password'))
)
# Enter login credentials
logging.info("Entering login credentials.")
username_field.send_keys(self.username)
password_field.send_keys(self.password)
# Submit the form
logging.info("Submitting the login form.")
password_field.send_keys(Keys.RETURN)
# Wait for login to complete
logging.info("Waiting for login to complete.")
WebDriverWait(driver, 30).until(
EC.url_changes(self.LOGIN_URL)
)
logging.info("Login successful.")
except Exception as e:
logging.error("An error occurred during login.", exc_info=True)
raise e
def get_courses(self):
driver = self.driver
try:
# Navigate to "My Courses" page
logging.info(f"Navigating to 'My Courses' page: {self.MY_COURSES_URL}")
driver.get(self.MY_COURSES_URL)
# Wait until the page is loaded
logging.info("Waiting for the 'My Courses' page to load.")
WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.CSS_SELECTOR, 'a.aalink.coursename'))
)
logging.info("'My Courses' page loaded.")
# Collect all courses from the page
logging.info("Collecting all courses from 'My Courses' page.")
course_elements = driver.find_elements(By.CSS_SELECTOR, 'a.aalink.coursename')
logging.info(f"{len(course_elements)} courses found.")
for coursename_element in course_elements:
try:
course_name = coursename_element.text.strip()
course_url = coursename_element.get_attribute('href')
self.courses.append({'CourseName': course_name, 'URL': course_url})
logging.info(f"Course found: {course_name} - {course_url}")
except Exception as e:
logging.warning(f"Error extracting course: {e}")
continue
if not self.courses:
logging.warning("No courses found. Check the HTML structure of the 'My Courses' page.")
print("No courses found. Check the HTML structure of the 'My Courses' page.")
except Exception as e:
logging.error("An error occurred while retrieving courses.", exc_info=True)
raise e
def download_all_courses(self):
if not self.courses:
logging.warning("No courses to download.")
return
driver = self.driver
# Ensure the download directory exists
if not os.path.exists(self.download_dir):
os.makedirs(self.download_dir)
course_counter = 1
for course in self.courses:
course_name = course['CourseName']
course_url = course['URL']
logging.info(f"Processing course: {course_name} - {course_url}")
driver.get(course_url)
# Wait until the course page is loaded
WebDriverWait(driver, 20).until(
EC.presence_of_element_located((By.TAG_NAME, 'body'))
)
try:
logging.info("Searching for 'Download course content' link.")
download_link = driver.find_element(By.CSS_SELECTOR, 'a[data-downloadcourse="1"]')
download_page_url = download_link.get_attribute('href')
logging.info(f"Download page URL: {download_page_url}")
# Open the download page
driver.get(download_page_url)
# Wait until the page is loaded
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.NAME, 'sesskey'))
)
# Extract 'sesskey' and 'contextid'
sesskey_input = driver.find_element(By.NAME, 'sesskey')
sesskey = sesskey_input.get_attribute('value')
contextid_input = driver.find_element(By.NAME, 'contextid')
contextid = contextid_input.get_attribute('value')
logging.info(f"sesskey: {sesskey}, contextid: {contextid}")
# Extract cookies from the Selenium session
logging.info("Extracting cookies from the Selenium session.")
selenium_cookies = driver.get_cookies()
cookies = {}
for cookie in selenium_cookies:
cookies[cookie['name']] = cookie['value']
# Prepare the HTTP POST request
download_url = 'https://moodle.fhgr.ch/course/downloadcontent.php'
post_data = {
'sesskey': sesskey,
'contextid': contextid,
'download': 1
}
headers = {
'User-Agent': 'Mozilla/5.0',
'Referer': download_page_url
}
# Send the POST request with cookies
logging.info(f"Sending HTTP POST request for course '{course_name}'.")
with requests.Session() as session:
session.cookies.update(cookies)
response = session.post(download_url, data=post_data, headers=headers, stream=True)
response.raise_for_status()
# Generate filename as course_N.zip
filename = f"course_{course_counter}.zip"
course_counter += 1
filepath = os.path.join(self.download_dir, filename)
# Overwrite existing files
if os.path.exists(filepath):
os.remove(filepath)
with open(filepath, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
logging.info(f"Course '{course_name}' downloaded and saved as '{filename}'.")
print(f"Course '{course_name}' downloaded.")
except Exception as e:
logging.error(f"Error downloading course '{course_name}': {e}", exc_info=True)
continue
def close(self):
if self.driver:
logging.info("Closing the browser.")
self.driver.quit()