Moodle Downloader and implementation test ( Written entirely by Openai-o1, with guidances from me )
parent
517511ddb4
commit
f21d288959
|
@ -0,0 +1,34 @@
|
|||
import logging
|
||||
from moodle_downloader import MoodleDownloader
|
||||
import os
|
||||
|
||||
# Configure logging
|
||||
logging.basicConfig(
|
||||
filename='moodle_downloader.log',
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
|
||||
# Get credentials from environment variables
|
||||
USERNAME = os.getenv('MOODLE_USERNAME')
|
||||
PASSWORD = os.getenv('MOODLE_PASSWORD')
|
||||
|
||||
if not USERNAME or not PASSWORD:
|
||||
print("Please set the MOODLE_USERNAME and MOODLE_PASSWORD environment variables.")
|
||||
exit(1)
|
||||
|
||||
# Create an instance of MoodleDownloader
|
||||
downloader = MoodleDownloader(USERNAME, PASSWORD, headless=True)
|
||||
|
||||
try:
|
||||
# Login to Moodle
|
||||
downloader.login()
|
||||
|
||||
# Retrieve courses
|
||||
downloader.get_courses()
|
||||
|
||||
# Download all courses
|
||||
downloader.download_all_courses()
|
||||
finally:
|
||||
# Close the browser
|
||||
downloader.close()
|
|
@ -0,0 +1,241 @@
|
|||
import os
|
||||
import time
|
||||
import logging
|
||||
import requests
|
||||
from selenium import webdriver
|
||||
from selenium.webdriver.chrome.options import Options
|
||||
from selenium.webdriver.common.by import By
|
||||
from selenium.webdriver.common.keys import Keys
|
||||
from selenium.webdriver.support.ui import WebDriverWait
|
||||
from selenium.webdriver.support import expected_conditions as EC
|
||||
from selenium.common.exceptions import TimeoutException
|
||||
from selenium.webdriver.chrome.service import Service as ChromeService
|
||||
from webdriver_manager.chrome import ChromeDriverManager
|
||||
|
||||
class MoodleDownloader:
|
||||
def __init__(self, username, password, download_dir=None, headless=False):
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.download_dir = download_dir or os.path.join(os.getcwd(), 'downloads')
|
||||
self.headless = headless
|
||||
self.driver = None
|
||||
self.courses = []
|
||||
self.LOGIN_URL = 'https://moodle.fhgr.ch/login/index.php'
|
||||
self.MY_COURSES_URL = 'https://moodle.fhgr.ch/my/courses.php'
|
||||
|
||||
def setup_driver(self):
|
||||
# Set up Chrome options
|
||||
chrome_options = Options()
|
||||
if self.headless:
|
||||
chrome_options.add_argument('--headless') # Headless mode
|
||||
chrome_options.add_argument('--no-sandbox')
|
||||
chrome_options.add_argument('--disable-dev-shm-usage')
|
||||
chrome_options.add_argument('--disable-gpu') # Optional
|
||||
chrome_options.add_argument('--window-size=1920,1080')
|
||||
|
||||
# Preferences for downloads
|
||||
prefs = {
|
||||
"download.default_directory": self.download_dir,
|
||||
"download.prompt_for_download": False,
|
||||
"download.directory_upgrade": True,
|
||||
"safebrowsing.enabled": True,
|
||||
"profile.default_content_setting_values.automatic_downloads": 1,
|
||||
}
|
||||
chrome_options.add_experimental_option("prefs", prefs)
|
||||
|
||||
# Initialize WebDriver
|
||||
logging.info("Initializing the WebDriver.")
|
||||
self.driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()), options=chrome_options)
|
||||
|
||||
def login(self):
|
||||
self.setup_driver()
|
||||
driver = self.driver
|
||||
try:
|
||||
# Open Moodle login page
|
||||
logging.info(f"Opening Moodle login page: {self.LOGIN_URL}")
|
||||
driver.get(self.LOGIN_URL)
|
||||
|
||||
# Wait until the page is loaded
|
||||
WebDriverWait(driver, 15).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, 'body'))
|
||||
)
|
||||
logging.info("Moodle login page loaded.")
|
||||
|
||||
# Check for 'wayf_submit_button' and click if present
|
||||
try:
|
||||
logging.info("Checking for 'wayf_submit_button'.")
|
||||
wayf_button = WebDriverWait(driver, 5).until(
|
||||
EC.element_to_be_clickable((By.ID, 'wayf_submit_button'))
|
||||
)
|
||||
wayf_button.click()
|
||||
logging.info("'wayf_submit_button' found and clicked.")
|
||||
|
||||
# Wait for redirection to login page
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.ID, 'username'))
|
||||
)
|
||||
except TimeoutException:
|
||||
logging.info("'wayf_submit_button' not found. Continuing with login.")
|
||||
|
||||
# Wait for username and password fields
|
||||
logging.info("Waiting for the username field.")
|
||||
username_field = WebDriverWait(driver, 20).until(
|
||||
EC.presence_of_element_located((By.ID, 'username'))
|
||||
)
|
||||
|
||||
logging.info("Waiting for the password field.")
|
||||
password_field = WebDriverWait(driver, 20).until(
|
||||
EC.presence_of_element_located((By.ID, 'password'))
|
||||
)
|
||||
|
||||
# Enter login credentials
|
||||
logging.info("Entering login credentials.")
|
||||
username_field.send_keys(self.username)
|
||||
password_field.send_keys(self.password)
|
||||
|
||||
# Submit the form
|
||||
logging.info("Submitting the login form.")
|
||||
password_field.send_keys(Keys.RETURN)
|
||||
|
||||
# Wait for login to complete
|
||||
logging.info("Waiting for login to complete.")
|
||||
WebDriverWait(driver, 30).until(
|
||||
EC.url_changes(self.LOGIN_URL)
|
||||
)
|
||||
logging.info("Login successful.")
|
||||
except Exception as e:
|
||||
logging.error("An error occurred during login.", exc_info=True)
|
||||
raise e
|
||||
|
||||
def get_courses(self):
|
||||
driver = self.driver
|
||||
try:
|
||||
# Navigate to "My Courses" page
|
||||
logging.info(f"Navigating to 'My Courses' page: {self.MY_COURSES_URL}")
|
||||
driver.get(self.MY_COURSES_URL)
|
||||
|
||||
# Wait until the page is loaded
|
||||
logging.info("Waiting for the 'My Courses' page to load.")
|
||||
WebDriverWait(driver, 20).until(
|
||||
EC.presence_of_element_located((By.CSS_SELECTOR, 'a.aalink.coursename'))
|
||||
)
|
||||
logging.info("'My Courses' page loaded.")
|
||||
|
||||
# Collect all courses from the page
|
||||
logging.info("Collecting all courses from 'My Courses' page.")
|
||||
course_elements = driver.find_elements(By.CSS_SELECTOR, 'a.aalink.coursename')
|
||||
|
||||
logging.info(f"{len(course_elements)} courses found.")
|
||||
|
||||
for coursename_element in course_elements:
|
||||
try:
|
||||
course_name = coursename_element.text.strip()
|
||||
course_url = coursename_element.get_attribute('href')
|
||||
self.courses.append({'CourseName': course_name, 'URL': course_url})
|
||||
logging.info(f"Course found: {course_name} - {course_url}")
|
||||
except Exception as e:
|
||||
logging.warning(f"Error extracting course: {e}")
|
||||
continue
|
||||
|
||||
if not self.courses:
|
||||
logging.warning("No courses found. Check the HTML structure of the 'My Courses' page.")
|
||||
print("No courses found. Check the HTML structure of the 'My Courses' page.")
|
||||
except Exception as e:
|
||||
logging.error("An error occurred while retrieving courses.", exc_info=True)
|
||||
raise e
|
||||
|
||||
def download_all_courses(self):
|
||||
if not self.courses:
|
||||
logging.warning("No courses to download.")
|
||||
return
|
||||
|
||||
driver = self.driver
|
||||
|
||||
# Ensure the download directory exists
|
||||
if not os.path.exists(self.download_dir):
|
||||
os.makedirs(self.download_dir)
|
||||
|
||||
course_counter = 1
|
||||
|
||||
for course in self.courses:
|
||||
course_name = course['CourseName']
|
||||
course_url = course['URL']
|
||||
logging.info(f"Processing course: {course_name} - {course_url}")
|
||||
driver.get(course_url)
|
||||
|
||||
# Wait until the course page is loaded
|
||||
WebDriverWait(driver, 20).until(
|
||||
EC.presence_of_element_located((By.TAG_NAME, 'body'))
|
||||
)
|
||||
|
||||
try:
|
||||
logging.info("Searching for 'Download course content' link.")
|
||||
download_link = driver.find_element(By.CSS_SELECTOR, 'a[data-downloadcourse="1"]')
|
||||
download_page_url = download_link.get_attribute('href')
|
||||
logging.info(f"Download page URL: {download_page_url}")
|
||||
|
||||
# Open the download page
|
||||
driver.get(download_page_url)
|
||||
|
||||
# Wait until the page is loaded
|
||||
WebDriverWait(driver, 10).until(
|
||||
EC.presence_of_element_located((By.NAME, 'sesskey'))
|
||||
)
|
||||
|
||||
# Extract 'sesskey' and 'contextid'
|
||||
sesskey_input = driver.find_element(By.NAME, 'sesskey')
|
||||
sesskey = sesskey_input.get_attribute('value')
|
||||
contextid_input = driver.find_element(By.NAME, 'contextid')
|
||||
contextid = contextid_input.get_attribute('value')
|
||||
|
||||
logging.info(f"sesskey: {sesskey}, contextid: {contextid}")
|
||||
|
||||
# Extract cookies from the Selenium session
|
||||
logging.info("Extracting cookies from the Selenium session.")
|
||||
selenium_cookies = driver.get_cookies()
|
||||
cookies = {}
|
||||
for cookie in selenium_cookies:
|
||||
cookies[cookie['name']] = cookie['value']
|
||||
|
||||
# Prepare the HTTP POST request
|
||||
download_url = 'https://moodle.fhgr.ch/course/downloadcontent.php'
|
||||
post_data = {
|
||||
'sesskey': sesskey,
|
||||
'contextid': contextid,
|
||||
'download': 1
|
||||
}
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0',
|
||||
'Referer': download_page_url
|
||||
}
|
||||
|
||||
# Send the POST request with cookies
|
||||
logging.info(f"Sending HTTP POST request for course '{course_name}'.")
|
||||
with requests.Session() as session:
|
||||
session.cookies.update(cookies)
|
||||
response = session.post(download_url, data=post_data, headers=headers, stream=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Generate filename as course_N.zip
|
||||
filename = f"course_{course_counter}.zip"
|
||||
course_counter += 1
|
||||
filepath = os.path.join(self.download_dir, filename)
|
||||
|
||||
# Overwrite existing files
|
||||
if os.path.exists(filepath):
|
||||
os.remove(filepath)
|
||||
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
logging.info(f"Course '{course_name}' downloaded and saved as '{filename}'.")
|
||||
print(f"Course '{course_name}' downloaded.")
|
||||
except Exception as e:
|
||||
logging.error(f"Error downloading course '{course_name}': {e}", exc_info=True)
|
||||
continue
|
||||
|
||||
def close(self):
|
||||
if self.driver:
|
||||
logging.info("Closing the browser.")
|
||||
self.driver.quit()
|
Loading…
Reference in New Issue