Python notebook

This commit is contained in:
DotNaos 2025-06-09 08:51:45 +02:00
parent 381b44bd69
commit ecadf0606c
2 changed files with 320 additions and 27 deletions

View File

@ -1,8 +1,295 @@
{
"cells": [],
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'\\nTeilaufgabe 1:\\n> Zerlegen Sie zeilenweise die Datei syslog in die folgenden Bestandteile:\\n- Datum\\n- Uhrzeit\\n- Rechnername\\n- Prozessname\\n- Numerische Prozess-ID (ist nicht in allen Zeilen vorhanden)\\n- Nachricht\\n> Wandeln Sie dabei Datum und Uhrzeit in ein datetime-Objekt um. Nehmen Sie das aktuelle Jahr für die Bildung des Objekts.\\n> Die Ausgabe ist eine Liste mit einem dict pro Zeile aus der Quelldatei.\\nTeilaufgabe 2:\\n> Laden Sie die Datei heidi.md.\\n> Ersetzen Sie den Namen Heidi unter Verwendung des re-Moduls mit einem anderen Mädchennamen. https://de.statista.com/statistik/daten/studie/286369/umfrage/beliebteste-vornamen-fuer-maedchen-in-der-deutschen-schweiz/ liefert Ihnen eine Auswahl.\\n> Speichern Sie den geänderten Text mit dem Dateinamen ab, den Sie ausgewählt haben. Vergessen Sie dabei nicht auf die Dateiendung .md.\\n'"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"from datetime import datetime\n",
"\n",
"\"\"\"\n",
"Teilaufgabe 1:\n",
"> Zerlegen Sie zeilenweise die Datei syslog in die folgenden Bestandteile:\n",
"- Datum\n",
"- Uhrzeit\n",
"- Rechnername\n",
"- Prozessname\n",
"- Numerische Prozess-ID (ist nicht in allen Zeilen vorhanden)\n",
"- Nachricht\n",
"> Wandeln Sie dabei Datum und Uhrzeit in ein datetime-Objekt um. Nehmen Sie das aktuelle Jahr für die Bildung des Objekts.\n",
"> Die Ausgabe ist eine Liste mit einem dict pro Zeile aus der Quelldatei.\n",
"Teilaufgabe 2:\n",
"> Laden Sie die Datei heidi.md.\n",
"> Ersetzen Sie den Namen Heidi unter Verwendung des re-Moduls mit einem anderen Mädchennamen. https://de.statista.com/statistik/daten/studie/286369/umfrage/beliebteste-vornamen-fuer-maedchen-in-der-deutschen-schweiz/ liefert Ihnen eine Auswahl.\n",
"> Speichern Sie den geänderten Text mit dem Dateinamen ab, den Sie ausgewählt haben. Vergessen Sie dabei nicht auf die Dateiendung .md.\n",
"\"\"\""
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"def aufgabe_1() -> list[dict]:\n",
" with open(\"syslog.txt\", \"r\", encoding=\"utf-8\") as f:\n",
" log = f.read()\n",
"\n",
" pattern = (\n",
" r\"^(?P<date>\\w{3}\\s+\\d{1,2})\\s\"\n",
" r\"(?P<time>\\d{2}:\\d{2}:\\d{2})\\s\"\n",
" r\"(?P<computer>[\\w.-]+)\\s\"\n",
" r\"(?P<process>[\\w.-]+)\"\n",
" r\"(?:\\[(?P<process_id>\\d+)\\])?:\\s\"\n",
" r\"(?P<message>.*)$\"\n",
" )\n",
"\n",
" items: list[dict] = []\n",
" for match in re.finditer(pattern, log, re.MULTILINE):\n",
" m = match.groupdict()\n",
" dt_str = f\"{m['date']} {datetime.now().year} {m['time']}\"\n",
" dt = datetime.strptime(dt_str, \"%b %d %Y %H:%M:%S\")\n",
"\n",
" items.append(\n",
" {\n",
" \"timestamp\": dt,\n",
" \"computer\": m[\"computer\"],\n",
" \"process\": m[\"process\"],\n",
" \"process_id\": m[\"process_id\"],\n",
" \"message\": m[\"message\"].strip(),\n",
" }\n",
" )\n",
"\n",
" return items"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"def aufgabe_2():\n",
" with open(\"heidi.md\", \"r\", encoding=\"utf-8\") as f:\n",
" text = f.read()\n",
"\n",
" pattern = r\"\\bHeidi\"\n",
" new_name = \"Mia\"\n",
" text = re.sub(pattern, new_name, text)\n",
"\n",
" output_filename = f\"{new_name.lower()}.md\"\n",
" with open(output_filename, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(text)\n"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>timestamp</th>\n",
" <th>computer</th>\n",
" <th>process</th>\n",
" <th>process_id</th>\n",
" <th>message</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2025-04-17 06:44:53</td>\n",
" <td>bs1-lab</td>\n",
" <td>kernel</td>\n",
" <td>None</td>\n",
" <td>Linux version 5.10.0-8-amd64 (debian-kernel@li...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2025-04-17 06:44:53</td>\n",
" <td>bs1-lab</td>\n",
" <td>kernel</td>\n",
" <td>None</td>\n",
" <td>Command line: BOOT_IMAGE=/boot/vmlinuz-5.10.0-...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>2025-04-17 06:44:53</td>\n",
" <td>bs1-lab</td>\n",
" <td>kernel</td>\n",
" <td>None</td>\n",
" <td>x86/fpu: Supporting XSAVE feature 0x001: 'x87 ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2025-04-17 06:44:53</td>\n",
" <td>bs1-lab</td>\n",
" <td>kernel</td>\n",
" <td>None</td>\n",
" <td>x86/fpu: Supporting XSAVE feature 0x002: 'SSE ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2025-04-17 06:44:53</td>\n",
" <td>bs1-lab</td>\n",
" <td>kernel</td>\n",
" <td>None</td>\n",
" <td>x86/fpu: Supporting XSAVE feature 0x004: 'AVX ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>954</th>\n",
" <td>2025-04-17 06:48:49</td>\n",
" <td>bs1-lab</td>\n",
" <td>sudo</td>\n",
" <td>779</td>\n",
" <td>tux : TTY=pts/0 ; PWD=/home/tux ; USER=root ; ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>955</th>\n",
" <td>2025-04-17 06:48:49</td>\n",
" <td>bs1-lab</td>\n",
" <td>sudo</td>\n",
" <td>779</td>\n",
" <td>pam_unix(sudo:session): session opened for use...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>956</th>\n",
" <td>2025-04-17 06:48:50</td>\n",
" <td>bs1-lab</td>\n",
" <td>sudo</td>\n",
" <td>779</td>\n",
" <td>pam_unix(sudo:session): session closed for use...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>957</th>\n",
" <td>2025-04-17 06:48:59</td>\n",
" <td>bs1-lab</td>\n",
" <td>sudo</td>\n",
" <td>783</td>\n",
" <td>tux : TTY=pts/0 ; PWD=/home/tux ; USER=root ; ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>958</th>\n",
" <td>2025-04-17 06:48:59</td>\n",
" <td>bs1-lab</td>\n",
" <td>sudo</td>\n",
" <td>783</td>\n",
" <td>pam_unix(sudo:session): session opened for use...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>959 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" timestamp computer process process_id \\\n",
"0 2025-04-17 06:44:53 bs1-lab kernel None \n",
"1 2025-04-17 06:44:53 bs1-lab kernel None \n",
"2 2025-04-17 06:44:53 bs1-lab kernel None \n",
"3 2025-04-17 06:44:53 bs1-lab kernel None \n",
"4 2025-04-17 06:44:53 bs1-lab kernel None \n",
".. ... ... ... ... \n",
"954 2025-04-17 06:48:49 bs1-lab sudo 779 \n",
"955 2025-04-17 06:48:49 bs1-lab sudo 779 \n",
"956 2025-04-17 06:48:50 bs1-lab sudo 779 \n",
"957 2025-04-17 06:48:59 bs1-lab sudo 783 \n",
"958 2025-04-17 06:48:59 bs1-lab sudo 783 \n",
"\n",
" message \n",
"0 Linux version 5.10.0-8-amd64 (debian-kernel@li... \n",
"1 Command line: BOOT_IMAGE=/boot/vmlinuz-5.10.0-... \n",
"2 x86/fpu: Supporting XSAVE feature 0x001: 'x87 ... \n",
"3 x86/fpu: Supporting XSAVE feature 0x002: 'SSE ... \n",
"4 x86/fpu: Supporting XSAVE feature 0x004: 'AVX ... \n",
".. ... \n",
"954 tux : TTY=pts/0 ; PWD=/home/tux ; USER=root ; ... \n",
"955 pam_unix(sudo:session): session opened for use... \n",
"956 pam_unix(sudo:session): session closed for use... \n",
"957 tux : TTY=pts/0 ; PWD=/home/tux ; USER=root ; ... \n",
"958 pam_unix(sudo:session): session opened for use... \n",
"\n",
"[959 rows x 5 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.DataFrame(aufgabe_1())\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"aufgabe_2()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv (3.13.3)",
"language": "python",
"name": "python3"
},
"language_info": {
"name": "python"
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.13.3"
}
},
"nbformat": 4,

View File

@ -1,5 +1,5 @@
from datetime import datetime
import re
from datetime import datetime
"""
Teilaufgabe 1:
@ -20,46 +20,52 @@ Teilaufgabe 2:
def aufgabe_1():
with open("part-3/regex/syslog.txt", "r") as f:
with open("part-3/regex/syslog.txt", "r", encoding="utf-8") as f:
log = f.read()
pattern = (
r"^(?P<date>\w{3}\s\d{2})\s"
r"(?P<time>\d\d:\d\d:\d\d)\s"
r"(?P<computer>\w+-\w+)\s"
r"(?P<process>\w+):\s"
r"(?P<process_id>pci\s\d{4}:\d\d:\d\d\.\d:)?.*:\s"
r"^(?P<date>\w{3}\s+\d{1,2})\s"
r"(?P<time>\d{2}:\d{2}:\d{2})\s"
r"(?P<computer>[\w.-]+)\s"
r"(?P<process>[\w.-]+)"
r"(?:\[(?P<process_id>\d+)\])?:\s"
r"(?P<message>.*)$"
)
res = re.findall(pattern, log, re.MULTILINE)
items: list[dict] = []
for r in res:
items.append({
"date": datetime.strptime(f"{r[0]} {datetime.now().year}", "%b %d %Y"),
"time": datetime.strptime(r[1], "%H:%M:%S"),
"computer": r[2],
"process": r[3],
"process_id": r[4],
"message": r[5],
})
for match in re.finditer(pattern, log, re.MULTILINE):
m = match.groupdict()
dt_str = f"{m['date']} {datetime.now().year} {m['time']}"
dt = datetime.strptime(dt_str, "%b %d %Y %H:%M:%S")
items.append(
{
"timestamp": dt,
"computer": m["computer"],
"process": m["process"],
"process_id": m["process_id"],
"message": m["message"].strip(),
}
)
return items
def aufgabe_2():
with open("part-3/regex/heidi.md", "r") as f:
with open("part-3/regex/heidi.md", "r", encoding="utf-8") as f:
text = f.read()
pattern = r"Heidi"
new_name = "Mia"
text = re.sub(pattern, new_name, text)\
pattern = r"\bHeidi"
new_name = "Mia"
text = re.sub(pattern, new_name, text)
with open("part-3/regex/heidi_new.md", "w") as f:
f.write(text)
output_filename = f"part-3/regex/{new_name.lower()}.md"
with open(output_filename, "w", encoding="utf-8") as f:
f.write(text)
if __name__ == "__main__":
# print(aufgabe_1())
aufgabe_2()
print(aufgabe_1())
# aufgabe_2()