Less mockup data.
parent
a173d34fa6
commit
971da55e45
|
@ -0,0 +1,19 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
use Illuminate\Database\Migrations\Migration;
|
||||||
|
use Illuminate\Database\Schema\Blueprint;
|
||||||
|
use Illuminate\Support\Facades\Schema;
|
||||||
|
|
||||||
|
return new class extends Migration
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Run the migrations.
|
||||||
|
*/
|
||||||
|
public function up(): void
|
||||||
|
{
|
||||||
|
Schema::table('episodes', function (Blueprint $table) {
|
||||||
|
$table->json('subtitle_data')->nullable();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
File diff suppressed because one or more lines are too long
|
@ -29,9 +29,8 @@
|
||||||
</span>Transkript</label>
|
</span>Transkript</label>
|
||||||
</div>
|
</div>
|
||||||
<div class="control">
|
<div class="control">
|
||||||
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio"
|
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio" /><label
|
||||||
/><label for="tablist_panel_terms"><span
|
for="tablist_panel_terms"><span class="visually-hidden">Zeige Tabinhalt
|
||||||
class="visually-hidden">Zeige Tabinhalt
|
|
||||||
</span>Dokumentbegriffe</label>
|
</span>Dokumentbegriffe</label>
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
@ -53,18 +52,19 @@
|
||||||
Transkript (Inhalt)
|
Transkript (Inhalt)
|
||||||
</h2>
|
</h2>
|
||||||
<div class="inner">
|
<div class="inner">
|
||||||
<ol id="transcript">
|
<ol id="transcript">
|
||||||
@foreach($subtitles['cues'] as $cue)
|
@foreach($subtitles['cues'] as $cue)
|
||||||
<li>
|
<li>
|
||||||
<div>
|
<div>
|
||||||
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{ gmdate('H:i:s', $cue['start'])}}</a>
|
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{
|
||||||
</div>
|
gmdate('H:i:s', $cue['start'])}}</a>
|
||||||
<div>
|
</div>
|
||||||
{!! $cue['text'] !!}
|
<div>
|
||||||
</div>
|
{!! $cue['text'] !!}
|
||||||
</li>
|
</div>
|
||||||
@endforeach
|
</li>
|
||||||
</ol>
|
@endforeach
|
||||||
|
</ol>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="panel" id="tablist_panel_terms_panel" hidden>
|
<div class="panel" id="tablist_panel_terms_panel" hidden>
|
||||||
|
@ -87,17 +87,28 @@
|
||||||
<div id="tracks">
|
<div id="tracks">
|
||||||
<div class="track">
|
<div class="track">
|
||||||
<div class="track-ctrl">
|
<div class="track-ctrl">
|
||||||
<h2>Anzahl Worte</h2>
|
<h2>Hauptfarben</h2>
|
||||||
<p></p>
|
<p>
|
||||||
|
Hauptfarbe pro Frame.
|
||||||
|
</p>
|
||||||
</div>
|
</div>
|
||||||
<div class="track-viz"></div>
|
<div class="track-viz">
|
||||||
|
{!! $dom_color !!}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="track">
|
||||||
|
<div class="track-ctrl">
|
||||||
|
<h2>Anzahl Worte</h2>
|
||||||
|
<p>Total Worte: {{ array_slice($subdata['word_count'], -1)[0] }}</p>
|
||||||
|
</div>
|
||||||
|
<div class="track-viz" id="words-count-track"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="track">
|
<div class="track">
|
||||||
<div class="track-ctrl">
|
<div class="track-ctrl">
|
||||||
<h2>Sentiment</h2>
|
<h2>Sentiment</h2>
|
||||||
<p></p>
|
<p></p>
|
||||||
</div>
|
</div>
|
||||||
<div class="track-viz"></div>
|
<div class="track-viz" id="sentiment-track"></div>
|
||||||
</div>
|
</div>
|
||||||
<div class="track">
|
<div class="track">
|
||||||
<div class="track-ctrl">
|
<div class="track-ctrl">
|
||||||
|
@ -113,15 +124,6 @@
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="track">
|
|
||||||
<div class="track-ctrl">
|
|
||||||
<h2>Dominant Colors</h2>
|
|
||||||
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
|
|
||||||
</div>
|
|
||||||
<div class="track-viz">
|
|
||||||
{!! $dom_color !!}
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</main>
|
</main>
|
||||||
|
|
|
@ -26,10 +26,11 @@ Route::get('/detail/{id}', function(int $id) {
|
||||||
$mediacomposition = json_decode($ep->mediacomposition, 1);
|
$mediacomposition = json_decode($ep->mediacomposition, 1);
|
||||||
$durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10;
|
$durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10;
|
||||||
|
|
||||||
|
$subdata = json_decode($ep->subtitle_data, 1);
|
||||||
|
|
||||||
$parser = new Podlove\Webvtt\Parser();
|
$parser = new Podlove\Webvtt\Parser();
|
||||||
$subtitles = $parser->parse($subtitles);
|
$subtitles = $parser->parse($subtitles);
|
||||||
//dump($subtitles);
|
|
||||||
|
|
||||||
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data]);
|
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data, 'subdata' => $subdata]);
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -4,14 +4,16 @@ con = sqlite3.connect("/home/gio/Code/VANA/database.sqlite")
|
||||||
cur = con.cursor()
|
cur = con.cursor()
|
||||||
|
|
||||||
def get_subtitle(id):
|
def get_subtitle(id):
|
||||||
row = cur.execute("SELECT subtitles FROM episodes WHERE ?", [id])
|
row = cur.execute("SELECT subtitles FROM episodes WHERE id = ?", [id])
|
||||||
|
subtitles = row.fetchone()
|
||||||
con.close()
|
con.close()
|
||||||
return row.fetchone()
|
return subtitles
|
||||||
|
|
||||||
def get_mediacomposition(id):
|
def get_mediacomposition(id):
|
||||||
row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id])
|
row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id])
|
||||||
|
mediacomp = row.fetchone()
|
||||||
con.close()
|
con.close()
|
||||||
return row.fetchone()
|
return mediacomp
|
||||||
|
|
||||||
def get_podcast_url(id):
|
def get_podcast_url(id):
|
||||||
row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id])
|
row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id])
|
||||||
|
@ -20,7 +22,12 @@ def get_podcast_url(id):
|
||||||
return podcast_url
|
return podcast_url
|
||||||
|
|
||||||
def save_data_viz(id, data):
|
def save_data_viz(id, data):
|
||||||
row = cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
|
cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
def save_subtitle_data(id, data):
|
||||||
|
cur.execute("UPDATE episodes SET subtitle_data = ? WHERE id = ?", [data, id])
|
||||||
con.commit()
|
con.commit()
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,33 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import polars as pl
|
||||||
|
from database import queries
|
||||||
|
|
||||||
|
|
||||||
|
def count_words(ep):
|
||||||
|
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||||
|
df = pl.read_csv(data_folder)
|
||||||
|
|
||||||
|
time = [0]
|
||||||
|
word_count = [0]
|
||||||
|
|
||||||
|
for i, row in enumerate(df["sentences"]):
|
||||||
|
time.append(round(df["end"][i]))
|
||||||
|
word_count.append(word_count[i] + len(df["sentences"][i].split(" ")))
|
||||||
|
|
||||||
|
return json.dumps({"time" : time, "word_count" : word_count })
|
||||||
|
|
||||||
|
|
||||||
|
# CLI
|
||||||
|
parser = argparse.ArgumentParser(
|
||||||
|
prog="Count Words", description="Counts the words in the subtitle"
|
||||||
|
)
|
||||||
|
|
||||||
|
parser.add_argument("--episode", "-ep")
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
queries.save_subtitle_data(args.episode, count_words(args.episode))
|
|
@ -1,10 +1,12 @@
|
||||||
import re
|
import re
|
||||||
import textwrap
|
import textwrap
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import polars as pl
|
import polars as pl
|
||||||
import spacy
|
import spacy
|
||||||
import timecode as tc
|
import timecode as tc
|
||||||
import webvtt
|
import webvtt
|
||||||
|
from database import queries
|
||||||
|
|
||||||
|
|
||||||
class NormalizeVtt:
|
class NormalizeVtt:
|
||||||
|
@ -13,7 +15,10 @@ class NormalizeVtt:
|
||||||
self.nlp = spacy.load("de_core_news_sm")
|
self.nlp = spacy.load("de_core_news_sm")
|
||||||
|
|
||||||
# Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht.
|
# Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht.
|
||||||
def sentencize(self, vtt):
|
def sentencize(self, ep):
|
||||||
|
|
||||||
|
vtt = queries.get_subtitle(ep)[0]
|
||||||
|
|
||||||
captions = webvtt.from_string(textwrap.dedent(vtt).strip())
|
captions = webvtt.from_string(textwrap.dedent(vtt).strip())
|
||||||
|
|
||||||
sentences = []
|
sentences = []
|
||||||
|
@ -79,4 +84,7 @@ class NormalizeVtt:
|
||||||
text = text + " " + token.text
|
text = text + " " + token.text
|
||||||
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
||||||
df = pl.DataFrame(data=d)
|
df = pl.DataFrame(data=d)
|
||||||
return df.write_csv()
|
|
||||||
|
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||||
|
|
||||||
|
return df.write_csv(data_folder)
|
||||||
|
|
|
@ -3,7 +3,6 @@
|
||||||
import argparse
|
import argparse
|
||||||
|
|
||||||
import normalize_vtt as nv
|
import normalize_vtt as nv
|
||||||
from database import queries
|
|
||||||
|
|
||||||
normalizer = nv.NormalizeVtt()
|
normalizer = nv.NormalizeVtt()
|
||||||
|
|
||||||
|
@ -19,5 +18,5 @@ args = parser.parse_args()
|
||||||
|
|
||||||
match args.action:
|
match args.action:
|
||||||
case "normalize":
|
case "normalize":
|
||||||
normalizer.sentencize(vtt=queries.get_subtitle(args.episode)[0])
|
normalizer.sentencize(args.episode)
|
||||||
print("Normalized.")
|
print("Normalized.")
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
# video_colors.py
|
# video_colors.py
|
||||||
Python script to extract the dominant colors of an image and generate a SVG containing them.
|
Python script to extract the dominant colors of an image and generate a SVG containing them.
|
||||||
The script expects a folder with all the extracted frames in ../../data/<id_episode|int>/
|
The script expects a folder with all the extracted frames in `../../data/<id_episode|int>/`
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python src/dominant_colors.py -ep <int>
|
python src/dominant_colors.py -ep <int>
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
# video_processing.py
|
# video_processing.py
|
||||||
Python script to download episodes and extract the frames.
|
Python script to download episodes and extract the frames.
|
||||||
The extracted frames are saved to ../../data/<id_episode|int>/
|
The extracted frames are saved to `../../data/<id_episode|int>/`
|
||||||
```bash
|
```bash
|
||||||
python src/video_processing.py -a <"download"|"extract_frames"> -ep <int>
|
python src/video_processing.py -a <"download"|"extract_frames"> -ep <int>
|
||||||
```
|
```
|
||||||
|
|
|
@ -1,4 +0,0 @@
|
||||||
from pathlib import Path
|
|
||||||
|
|
||||||
data_folder = str(Path(__file__).parents[3])
|
|
||||||
print(data_folder)
|
|
BIN
database.sqlite
BIN
database.sqlite
Binary file not shown.
Loading…
Reference in New Issue