Moodle Downloader and implementation test ( Written entirely by Openai-o1, with guidances from me )
parent
517511ddb4
commit
f21d288959
|
@ -0,0 +1,34 @@
|
||||||
|
import logging
|
||||||
|
from moodle_downloader import MoodleDownloader
|
||||||
|
import os
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(
|
||||||
|
filename='moodle_downloader.log',
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Get credentials from environment variables
|
||||||
|
USERNAME = os.getenv('MOODLE_USERNAME')
|
||||||
|
PASSWORD = os.getenv('MOODLE_PASSWORD')
|
||||||
|
|
||||||
|
if not USERNAME or not PASSWORD:
|
||||||
|
print("Please set the MOODLE_USERNAME and MOODLE_PASSWORD environment variables.")
|
||||||
|
exit(1)
|
||||||
|
|
||||||
|
# Create an instance of MoodleDownloader
|
||||||
|
downloader = MoodleDownloader(USERNAME, PASSWORD, headless=True)
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Login to Moodle
|
||||||
|
downloader.login()
|
||||||
|
|
||||||
|
# Retrieve courses
|
||||||
|
downloader.get_courses()
|
||||||
|
|
||||||
|
# Download all courses
|
||||||
|
downloader.download_all_courses()
|
||||||
|
finally:
|
||||||
|
# Close the browser
|
||||||
|
downloader.close()
|
|
@ -0,0 +1,241 @@
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import logging
|
||||||
|
import requests
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from selenium.webdriver.common.keys import Keys
|
||||||
|
from selenium.webdriver.support.ui import WebDriverWait
|
||||||
|
from selenium.webdriver.support import expected_conditions as EC
|
||||||
|
from selenium.common.exceptions import TimeoutException
|
||||||
|
from selenium.webdriver.chrome.service import Service as ChromeService
|
||||||
|
from webdriver_manager.chrome import ChromeDriverManager
|
||||||
|
|
||||||
|
class MoodleDownloader:
|
||||||
|
def __init__(self, username, password, download_dir=None, headless=False):
|
||||||
|
self.username = username
|
||||||
|
self.password = password
|
||||||
|
self.download_dir = download_dir or os.path.join(os.getcwd(), 'downloads')
|
||||||
|
self.headless = headless
|
||||||
|
self.driver = None
|
||||||
|
self.courses = []
|
||||||
|
self.LOGIN_URL = 'https://moodle.fhgr.ch/login/index.php'
|
||||||
|
self.MY_COURSES_URL = 'https://moodle.fhgr.ch/my/courses.php'
|
||||||
|
|
||||||
|
def setup_driver(self):
|
||||||
|
# Set up Chrome options
|
||||||
|
chrome_options = Options()
|
||||||
|
if self.headless:
|
||||||
|
chrome_options.add_argument('--headless') # Headless mode
|
||||||
|
chrome_options.add_argument('--no-sandbox')
|
||||||
|
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||||
|
chrome_options.add_argument('--disable-gpu') # Optional
|
||||||
|
chrome_options.add_argument('--window-size=1920,1080')
|
||||||
|
|
||||||
|
# Preferences for downloads
|
||||||
|
prefs = {
|
||||||
|
"download.default_directory": self.download_dir,
|
||||||
|
"download.prompt_for_download": False,
|
||||||
|
"download.directory_upgrade": True,
|
||||||
|
"safebrowsing.enabled": True,
|
||||||
|
"profile.default_content_setting_values.automatic_downloads": 1,
|
||||||
|
}
|
||||||
|
chrome_options.add_experimental_option("prefs", prefs)
|
||||||
|
|
||||||
|
# Initialize WebDriver
|
||||||
|
logging.info("Initializing the WebDriver.")
|
||||||
|
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
||||||
|
|
||||||
|
def login(self):
|
||||||
|
self.setup_driver()
|
||||||
|
driver = self.driver
|
||||||
|
try:
|
||||||
|
# Open Moodle login page
|
||||||
|
logging.info(f"Opening Moodle login page: {self.LOGIN_URL}")
|
||||||
|
driver.get(self.LOGIN_URL)
|
||||||
|
|
||||||
|
# Wait until the page is loaded
|
||||||
|
WebDriverWait(driver, 15).until(
|
||||||
|
EC.presence_of_element_located((By.TAG_NAME, 'body'))
|
||||||
|
)
|
||||||
|
logging.info("Moodle login page loaded.")
|
||||||
|
|
||||||
|
# Check for 'wayf_submit_button' and click if present
|
||||||
|
try:
|
||||||
|
logging.info("Checking for 'wayf_submit_button'.")
|
||||||
|
wayf_button = WebDriverWait(driver, 5).until(
|
||||||
|
EC.element_to_be_clickable((By.ID, 'wayf_submit_button'))
|
||||||
|
)
|
||||||
|
wayf_button.click()
|
||||||
|
logging.info("'wayf_submit_button' found and clicked.")
|
||||||
|
|
||||||
|
# Wait for redirection to login page
|
||||||
|
WebDriverWait(driver, 10).until(
|
||||||
|
EC.presence_of_element_located((By.ID, 'username'))
|
||||||
|
)
|
||||||
|
except TimeoutException:
|
||||||
|
logging.info("'wayf_submit_button' not found. Continuing with login.")
|
||||||
|
|
||||||
|
# Wait for username and password fields
|
||||||
|
logging.info("Waiting for the username field.")
|
||||||
|
username_field = WebDriverWait(driver, 20).until(
|
||||||
|
EC.presence_of_element_located((By.ID, 'username'))
|
||||||
|
)
|
||||||
|
|
||||||
|
logging.info("Waiting for the password field.")
|
||||||
|
password_field = WebDriverWait(driver, 20).until(
|
||||||
|
EC.presence_of_element_located((By.ID, 'password'))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Enter login credentials
|
||||||
|
logging.info("Entering login credentials.")
|
||||||
|
username_field.send_keys(self.username)
|
||||||
|
password_field.send_keys(self.password)
|
||||||
|
|
||||||
|
# Submit the form
|
||||||
|
logging.info("Submitting the login form.")
|
||||||
|
password_field.send_keys(Keys.RETURN)
|
||||||
|
|
||||||
|
# Wait for login to complete
|
||||||
|
logging.info("Waiting for login to complete.")
|
||||||
|
WebDriverWait(driver, 30).until(
|
||||||
|
EC.url_changes(self.LOGIN_URL)
|
||||||
|
)
|
||||||
|
logging.info("Login successful.")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("An error occurred during login.", exc_info=True)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def get_courses(self):
|
||||||
|
driver = self.driver
|
||||||
|
try:
|
||||||
|
# Navigate to "My Courses" page
|
||||||
|
logging.info(f"Navigating to 'My Courses' page: {self.MY_COURSES_URL}")
|
||||||
|
driver.get(self.MY_COURSES_URL)
|
||||||
|
|
||||||
|
# Wait until the page is loaded
|
||||||
|
logging.info("Waiting for the 'My Courses' page to load.")
|
||||||
|
WebDriverWait(driver, 20).until(
|
||||||
|
EC.presence_of_element_located((By.CSS_SELECTOR, 'a.aalink.coursename'))
|
||||||
|
)
|
||||||
|
logging.info("'My Courses' page loaded.")
|
||||||
|
|
||||||
|
# Collect all courses from the page
|
||||||
|
logging.info("Collecting all courses from 'My Courses' page.")
|
||||||
|
course_elements = driver.find_elements(By.CSS_SELECTOR, 'a.aalink.coursename')
|
||||||
|
|
||||||
|
logging.info(f"{len(course_elements)} courses found.")
|
||||||
|
|
||||||
|
for coursename_element in course_elements:
|
||||||
|
try:
|
||||||
|
course_name = coursename_element.text.strip()
|
||||||
|
course_url = coursename_element.get_attribute('href')
|
||||||
|
self.courses.append({'CourseName': course_name, 'URL': course_url})
|
||||||
|
logging.info(f"Course found: {course_name} - {course_url}")
|
||||||
|
except Exception as e:
|
||||||
|
logging.warning(f"Error extracting course: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not self.courses:
|
||||||
|
logging.warning("No courses found. Check the HTML structure of the 'My Courses' page.")
|
||||||
|
print("No courses found. Check the HTML structure of the 'My Courses' page.")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error("An error occurred while retrieving courses.", exc_info=True)
|
||||||
|
raise e
|
||||||
|
|
||||||
|
def download_all_courses(self):
|
||||||
|
if not self.courses:
|
||||||
|
logging.warning("No courses to download.")
|
||||||
|
return
|
||||||
|
|
||||||
|
driver = self.driver
|
||||||
|
|
||||||
|
# Ensure the download directory exists
|
||||||
|
if not os.path.exists(self.download_dir):
|
||||||
|
os.makedirs(self.download_dir)
|
||||||
|
|
||||||
|
course_counter = 1
|
||||||
|
|
||||||
|
for course in self.courses:
|
||||||
|
course_name = course['CourseName']
|
||||||
|
course_url = course['URL']
|
||||||
|
logging.info(f"Processing course: {course_name} - {course_url}")
|
||||||
|
driver.get(course_url)
|
||||||
|
|
||||||
|
# Wait until the course page is loaded
|
||||||
|
WebDriverWait(driver, 20).until(
|
||||||
|
EC.presence_of_element_located((By.TAG_NAME, 'body'))
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
logging.info("Searching for 'Download course content' link.")
|
||||||
|
download_link = driver.find_element(By.CSS_SELECTOR, 'a[data-downloadcourse="1"]')
|
||||||
|
download_page_url = download_link.get_attribute('href')
|
||||||
|
logging.info(f"Download page URL: {download_page_url}")
|
||||||
|
|
||||||
|
# Open the download page
|
||||||
|
driver.get(download_page_url)
|
||||||
|
|
||||||
|
# Wait until the page is loaded
|
||||||
|
WebDriverWait(driver, 10).until(
|
||||||
|
EC.presence_of_element_located((By.NAME, 'sesskey'))
|
||||||
|
)
|
||||||
|
|
||||||
|
# Extract 'sesskey' and 'contextid'
|
||||||
|
sesskey_input = driver.find_element(By.NAME, 'sesskey')
|
||||||
|
sesskey = sesskey_input.get_attribute('value')
|
||||||
|
contextid_input = driver.find_element(By.NAME, 'contextid')
|
||||||
|
contextid = contextid_input.get_attribute('value')
|
||||||
|
|
||||||
|
logging.info(f"sesskey: {sesskey}, contextid: {contextid}")
|
||||||
|
|
||||||
|
# Extract cookies from the Selenium session
|
||||||
|
logging.info("Extracting cookies from the Selenium session.")
|
||||||
|
selenium_cookies = driver.get_cookies()
|
||||||
|
cookies = {}
|
||||||
|
for cookie in selenium_cookies:
|
||||||
|
cookies[cookie['name']] = cookie['value']
|
||||||
|
|
||||||
|
# Prepare the HTTP POST request
|
||||||
|
download_url = 'https://moodle.fhgr.ch/course/downloadcontent.php'
|
||||||
|
post_data = {
|
||||||
|
'sesskey': sesskey,
|
||||||
|
'contextid': contextid,
|
||||||
|
'download': 1
|
||||||
|
}
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'Mozilla/5.0',
|
||||||
|
'Referer': download_page_url
|
||||||
|
}
|
||||||
|
|
||||||
|
# Send the POST request with cookies
|
||||||
|
logging.info(f"Sending HTTP POST request for course '{course_name}'.")
|
||||||
|
with requests.Session() as session:
|
||||||
|
session.cookies.update(cookies)
|
||||||
|
response = session.post(download_url, data=post_data, headers=headers, stream=True)
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
# Generate filename as course_N.zip
|
||||||
|
filename = f"course_{course_counter}.zip"
|
||||||
|
course_counter += 1
|
||||||
|
filepath = os.path.join(self.download_dir, filename)
|
||||||
|
|
||||||
|
# Overwrite existing files
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
os.remove(filepath)
|
||||||
|
|
||||||
|
with open(filepath, 'wb') as f:
|
||||||
|
for chunk in response.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
logging.info(f"Course '{course_name}' downloaded and saved as '{filename}'.")
|
||||||
|
print(f"Course '{course_name}' downloaded.")
|
||||||
|
except Exception as e:
|
||||||
|
logging.error(f"Error downloading course '{course_name}': {e}", exc_info=True)
|
||||||
|
continue
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
if self.driver:
|
||||||
|
logging.info("Closing the browser.")
|
||||||
|
self.driver.quit()
|
Loading…
Reference in New Issue