Less mockup data.

master
Giò Diani 2024-11-23 16:29:44 +01:00
parent a173d34fa6
commit 971da55e45
15 changed files with 138 additions and 72 deletions

View File

@ -0,0 +1,19 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('episodes', function (Blueprint $table) {
$table->json('subtitle_data')->nullable();
});
}
};

File diff suppressed because one or more lines are too long

View File

@ -29,9 +29,8 @@
</span>Transkript</label>
</div>
<div class="control">
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio"
/><label for="tablist_panel_terms"><span
class="visually-hidden">Zeige Tabinhalt
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio" /><label
for="tablist_panel_terms"><span class="visually-hidden">Zeige Tabinhalt
</span>Dokumentbegriffe</label>
</div>
</fieldset>
@ -53,18 +52,19 @@
Transkript (Inhalt)
</h2>
<div class="inner">
<ol id="transcript">
@foreach($subtitles['cues'] as $cue)
<li>
<div>
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{ gmdate('H:i:s', $cue['start'])}}</a>
</div>
<div>
{!! $cue['text'] !!}
</div>
</li>
@endforeach
</ol>
<ol id="transcript">
@foreach($subtitles['cues'] as $cue)
<li>
<div>
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{
gmdate('H:i:s', $cue['start'])}}</a>
</div>
<div>
{!! $cue['text'] !!}
</div>
</li>
@endforeach
</ol>
</div>
</div>
<div class="panel" id="tablist_panel_terms_panel" hidden>
@ -87,17 +87,28 @@
<div id="tracks">
<div class="track">
<div class="track-ctrl">
<h2>Anzahl Worte</h2>
<p></p>
<h2>Hauptfarben</h2>
<p>
Hauptfarbe pro Frame.
</p>
</div>
<div class="track-viz"></div>
<div class="track-viz">
{!! $dom_color !!}
</div>
</div>
<div class="track">
<div class="track-ctrl">
<h2>Anzahl Worte</h2>
<p>Total Worte: {{ array_slice($subdata['word_count'], -1)[0] }}</p>
</div>
<div class="track-viz" id="words-count-track"></div>
</div>
<div class="track">
<div class="track-ctrl">
<h2>Sentiment</h2>
<p></p>
</div>
<div class="track-viz"></div>
<div class="track-viz" id="sentiment-track"></div>
</div>
<div class="track">
<div class="track-ctrl">
@ -113,15 +124,6 @@
</ul>
</div>
</div>
<div class="track">
<div class="track-ctrl">
<h2>Dominant Colors</h2>
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
</div>
<div class="track-viz">
{!! $dom_color !!}
</div>
</div>
</div>
</div>
</main>

View File

@ -26,10 +26,11 @@ Route::get('/detail/{id}', function(int $id) {
$mediacomposition = json_decode($ep->mediacomposition, 1);
$durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10;
$subdata = json_decode($ep->subtitle_data, 1);
$parser = new Podlove\Webvtt\Parser();
$subtitles = $parser->parse($subtitles);
//dump($subtitles);
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data]);
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data, 'subdata' => $subdata]);
});

View File

@ -4,14 +4,16 @@ con = sqlite3.connect("/home/gio/Code/VANA/database.sqlite")
cur = con.cursor()
def get_subtitle(id):
row = cur.execute("SELECT subtitles FROM episodes WHERE ?", [id])
row = cur.execute("SELECT subtitles FROM episodes WHERE id = ?", [id])
subtitles = row.fetchone()
con.close()
return row.fetchone()
return subtitles
def get_mediacomposition(id):
row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id])
mediacomp = row.fetchone()
con.close()
return row.fetchone()
return mediacomp
def get_podcast_url(id):
row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id])
@ -20,7 +22,12 @@ def get_podcast_url(id):
return podcast_url
def save_data_viz(id, data):
row = cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
con.commit()
con.close()
def save_subtitle_data(id, data):
cur.execute("UPDATE episodes SET subtitle_data = ? WHERE id = ?", [data, id])
con.commit()
con.close()

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import argparse
import json
from pathlib import Path
import polars as pl
from database import queries
def count_words(ep):
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
df = pl.read_csv(data_folder)
time = [0]
word_count = [0]
for i, row in enumerate(df["sentences"]):
time.append(round(df["end"][i]))
word_count.append(word_count[i] + len(df["sentences"][i].split(" ")))
return json.dumps({"time" : time, "word_count" : word_count })
# CLI
parser = argparse.ArgumentParser(
prog="Count Words", description="Counts the words in the subtitle"
)
parser.add_argument("--episode", "-ep")
args = parser.parse_args()
queries.save_subtitle_data(args.episode, count_words(args.episode))

View File

@ -1,10 +1,12 @@
import re
import textwrap
from pathlib import Path
import polars as pl
import spacy
import timecode as tc
import webvtt
from database import queries
class NormalizeVtt:
@ -13,7 +15,10 @@ class NormalizeVtt:
self.nlp = spacy.load("de_core_news_sm")
# Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht.
def sentencize(self, vtt):
def sentencize(self, ep):
vtt = queries.get_subtitle(ep)[0]
captions = webvtt.from_string(textwrap.dedent(vtt).strip())
sentences = []
@ -79,4 +84,7 @@ class NormalizeVtt:
text = text + " " + token.text
d = {"sentences": sentences, "start": times_start, "end": times_end}
df = pl.DataFrame(data=d)
return df.write_csv()
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
return df.write_csv(data_folder)

View File

@ -3,7 +3,6 @@
import argparse
import normalize_vtt as nv
from database import queries
normalizer = nv.NormalizeVtt()
@ -19,5 +18,5 @@ args = parser.parse_args()
match args.action:
case "normalize":
normalizer.sentencize(vtt=queries.get_subtitle(args.episode)[0])
normalizer.sentencize(args.episode)
print("Normalized.")

View File

@ -1,6 +1,7 @@
# video_colors.py
Python script to extract the dominant colors of an image and generate a SVG containing them.
The script expects a folder with all the extracted frames in ../../data/<id_episode|int>/
The script expects a folder with all the extracted frames in `../../data/<id_episode|int>/`
```bash
python src/dominant_colors.py -ep <int>
```

View File

@ -1,6 +1,6 @@
# video_processing.py
Python script to download episodes and extract the frames.
The extracted frames are saved to ../../data/<id_episode|int>/
The extracted frames are saved to `../../data/<id_episode|int>/`
```bash
python src/video_processing.py -a <"download"|"extract_frames"> -ep <int>
```

View File

@ -1,4 +0,0 @@
from pathlib import Path
data_folder = str(Path(__file__).parents[3])
print(data_folder)

Binary file not shown.