Less mockup data.

master
Giò Diani 2024-11-23 16:29:44 +01:00
parent a173d34fa6
commit 971da55e45
15 changed files with 138 additions and 72 deletions

View File

@ -0,0 +1,19 @@
<?php
use Illuminate\Database\Migrations\Migration;
use Illuminate\Database\Schema\Blueprint;
use Illuminate\Support\Facades\Schema;
return new class extends Migration
{
/**
* Run the migrations.
*/
public function up(): void
{
Schema::table('episodes', function (Blueprint $table) {
$table->json('subtitle_data')->nullable();
});
}
};

File diff suppressed because one or more lines are too long

View File

@ -29,9 +29,8 @@
</span>Transkript</label> </span>Transkript</label>
</div> </div>
<div class="control"> <div class="control">
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio" <input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio" /><label
/><label for="tablist_panel_terms"><span for="tablist_panel_terms"><span class="visually-hidden">Zeige Tabinhalt
class="visually-hidden">Zeige Tabinhalt
</span>Dokumentbegriffe</label> </span>Dokumentbegriffe</label>
</div> </div>
</fieldset> </fieldset>
@ -57,7 +56,8 @@
@foreach($subtitles['cues'] as $cue) @foreach($subtitles['cues'] as $cue)
<li> <li>
<div> <div>
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{ gmdate('H:i:s', $cue['start'])}}</a> <a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{
gmdate('H:i:s', $cue['start'])}}</a>
</div> </div>
<div> <div>
{!! $cue['text'] !!} {!! $cue['text'] !!}
@ -87,17 +87,28 @@
<div id="tracks"> <div id="tracks">
<div class="track"> <div class="track">
<div class="track-ctrl"> <div class="track-ctrl">
<h2>Anzahl Worte</h2> <h2>Hauptfarben</h2>
<p></p> <p>
Hauptfarbe pro Frame.
</p>
</div> </div>
<div class="track-viz"></div> <div class="track-viz">
{!! $dom_color !!}
</div>
</div>
<div class="track">
<div class="track-ctrl">
<h2>Anzahl Worte</h2>
<p>Total Worte: {{ array_slice($subdata['word_count'], -1)[0] }}</p>
</div>
<div class="track-viz" id="words-count-track"></div>
</div> </div>
<div class="track"> <div class="track">
<div class="track-ctrl"> <div class="track-ctrl">
<h2>Sentiment</h2> <h2>Sentiment</h2>
<p></p> <p></p>
</div> </div>
<div class="track-viz"></div> <div class="track-viz" id="sentiment-track"></div>
</div> </div>
<div class="track"> <div class="track">
<div class="track-ctrl"> <div class="track-ctrl">
@ -113,15 +124,6 @@
</ul> </ul>
</div> </div>
</div> </div>
<div class="track">
<div class="track-ctrl">
<h2>Dominant Colors</h2>
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
</div>
<div class="track-viz">
{!! $dom_color !!}
</div>
</div>
</div> </div>
</div> </div>
</main> </main>

View File

@ -26,10 +26,11 @@ Route::get('/detail/{id}', function(int $id) {
$mediacomposition = json_decode($ep->mediacomposition, 1); $mediacomposition = json_decode($ep->mediacomposition, 1);
$durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10; $durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10;
$subdata = json_decode($ep->subtitle_data, 1);
$parser = new Podlove\Webvtt\Parser(); $parser = new Podlove\Webvtt\Parser();
$subtitles = $parser->parse($subtitles); $subtitles = $parser->parse($subtitles);
//dump($subtitles);
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data]); return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data, 'subdata' => $subdata]);
}); });

View File

@ -4,14 +4,16 @@ con = sqlite3.connect("/home/gio/Code/VANA/database.sqlite")
cur = con.cursor() cur = con.cursor()
def get_subtitle(id): def get_subtitle(id):
row = cur.execute("SELECT subtitles FROM episodes WHERE ?", [id]) row = cur.execute("SELECT subtitles FROM episodes WHERE id = ?", [id])
subtitles = row.fetchone()
con.close() con.close()
return row.fetchone() return subtitles
def get_mediacomposition(id): def get_mediacomposition(id):
row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id]) row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id])
mediacomp = row.fetchone()
con.close() con.close()
return row.fetchone() return mediacomp
def get_podcast_url(id): def get_podcast_url(id):
row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id]) row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id])
@ -20,7 +22,12 @@ def get_podcast_url(id):
return podcast_url return podcast_url
def save_data_viz(id, data): def save_data_viz(id, data):
row = cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id]) cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
con.commit()
con.close()
def save_subtitle_data(id, data):
cur.execute("UPDATE episodes SET subtitle_data = ? WHERE id = ?", [data, id])
con.commit() con.commit()
con.close() con.close()

View File

@ -0,0 +1,33 @@
#!/usr/bin/env python3
import argparse
import json
from pathlib import Path
import polars as pl
from database import queries
def count_words(ep):
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
df = pl.read_csv(data_folder)
time = [0]
word_count = [0]
for i, row in enumerate(df["sentences"]):
time.append(round(df["end"][i]))
word_count.append(word_count[i] + len(df["sentences"][i].split(" ")))
return json.dumps({"time" : time, "word_count" : word_count })
# CLI
parser = argparse.ArgumentParser(
prog="Count Words", description="Counts the words in the subtitle"
)
parser.add_argument("--episode", "-ep")
args = parser.parse_args()
queries.save_subtitle_data(args.episode, count_words(args.episode))

View File

@ -1,10 +1,12 @@
import re import re
import textwrap import textwrap
from pathlib import Path
import polars as pl import polars as pl
import spacy import spacy
import timecode as tc import timecode as tc
import webvtt import webvtt
from database import queries
class NormalizeVtt: class NormalizeVtt:
@ -13,7 +15,10 @@ class NormalizeVtt:
self.nlp = spacy.load("de_core_news_sm") self.nlp = spacy.load("de_core_news_sm")
# Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht. # Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht.
def sentencize(self, vtt): def sentencize(self, ep):
vtt = queries.get_subtitle(ep)[0]
captions = webvtt.from_string(textwrap.dedent(vtt).strip()) captions = webvtt.from_string(textwrap.dedent(vtt).strip())
sentences = [] sentences = []
@ -79,4 +84,7 @@ class NormalizeVtt:
text = text + " " + token.text text = text + " " + token.text
d = {"sentences": sentences, "start": times_start, "end": times_end} d = {"sentences": sentences, "start": times_start, "end": times_end}
df = pl.DataFrame(data=d) df = pl.DataFrame(data=d)
return df.write_csv()
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
return df.write_csv(data_folder)

View File

@ -3,7 +3,6 @@
import argparse import argparse
import normalize_vtt as nv import normalize_vtt as nv
from database import queries
normalizer = nv.NormalizeVtt() normalizer = nv.NormalizeVtt()
@ -19,5 +18,5 @@ args = parser.parse_args()
match args.action: match args.action:
case "normalize": case "normalize":
normalizer.sentencize(vtt=queries.get_subtitle(args.episode)[0]) normalizer.sentencize(args.episode)
print("Normalized.") print("Normalized.")

View File

@ -1,6 +1,7 @@
# video_colors.py # video_colors.py
Python script to extract the dominant colors of an image and generate a SVG containing them. Python script to extract the dominant colors of an image and generate a SVG containing them.
The script expects a folder with all the extracted frames in ../../data/<id_episode|int>/ The script expects a folder with all the extracted frames in `../../data/<id_episode|int>/`
```bash ```bash
python src/dominant_colors.py -ep <int> python src/dominant_colors.py -ep <int>
``` ```

View File

@ -1,6 +1,6 @@
# video_processing.py # video_processing.py
Python script to download episodes and extract the frames. Python script to download episodes and extract the frames.
The extracted frames are saved to ../../data/<id_episode|int>/ The extracted frames are saved to `../../data/<id_episode|int>/`
```bash ```bash
python src/video_processing.py -a <"download"|"extract_frames"> -ep <int> python src/video_processing.py -a <"download"|"extract_frames"> -ep <int>
``` ```

View File

@ -1,4 +0,0 @@
from pathlib import Path
data_folder = str(Path(__file__).parents[3])
print(data_folder)

Binary file not shown.