Less mockup data.
parent
a173d34fa6
commit
971da55e45
|
@ -0,0 +1,19 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::table('episodes', function (Blueprint $table) {
|
||||
$table->json('subtitle_data')->nullable();
|
||||
});
|
||||
}
|
||||
|
||||
};
|
File diff suppressed because one or more lines are too long
|
@ -29,9 +29,8 @@
|
|||
</span>Transkript</label>
|
||||
</div>
|
||||
<div class="control">
|
||||
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio"
|
||||
/><label for="tablist_panel_terms"><span
|
||||
class="visually-hidden">Zeige Tabinhalt
|
||||
<input class="visually-hidden" id="tablist_panel_terms" name="tablist" type="radio" /><label
|
||||
for="tablist_panel_terms"><span class="visually-hidden">Zeige Tabinhalt
|
||||
</span>Dokumentbegriffe</label>
|
||||
</div>
|
||||
</fieldset>
|
||||
|
@ -53,18 +52,19 @@
|
|||
Transkript (Inhalt)
|
||||
</h2>
|
||||
<div class="inner">
|
||||
<ol id="transcript">
|
||||
@foreach($subtitles['cues'] as $cue)
|
||||
<li>
|
||||
<div>
|
||||
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{ gmdate('H:i:s', $cue['start'])}}</a>
|
||||
</div>
|
||||
<div>
|
||||
{!! $cue['text'] !!}
|
||||
</div>
|
||||
</li>
|
||||
@endforeach
|
||||
</ol>
|
||||
<ol id="transcript">
|
||||
@foreach($subtitles['cues'] as $cue)
|
||||
<li>
|
||||
<div>
|
||||
<a title="Zu Zeitpunkt {{ $cue['start']}} springen." data-start="{{ $cue['start']}}">{{
|
||||
gmdate('H:i:s', $cue['start'])}}</a>
|
||||
</div>
|
||||
<div>
|
||||
{!! $cue['text'] !!}
|
||||
</div>
|
||||
</li>
|
||||
@endforeach
|
||||
</ol>
|
||||
</div>
|
||||
</div>
|
||||
<div class="panel" id="tablist_panel_terms_panel" hidden>
|
||||
|
@ -87,17 +87,28 @@
|
|||
<div id="tracks">
|
||||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
<h2>Anzahl Worte</h2>
|
||||
<p></p>
|
||||
<h2>Hauptfarben</h2>
|
||||
<p>
|
||||
Hauptfarbe pro Frame.
|
||||
</p>
|
||||
</div>
|
||||
<div class="track-viz"></div>
|
||||
<div class="track-viz">
|
||||
{!! $dom_color !!}
|
||||
</div>
|
||||
</div>
|
||||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
<h2>Anzahl Worte</h2>
|
||||
<p>Total Worte: {{ array_slice($subdata['word_count'], -1)[0] }}</p>
|
||||
</div>
|
||||
<div class="track-viz" id="words-count-track"></div>
|
||||
</div>
|
||||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
<h2>Sentiment</h2>
|
||||
<p></p>
|
||||
</div>
|
||||
<div class="track-viz"></div>
|
||||
<div class="track-viz" id="sentiment-track"></div>
|
||||
</div>
|
||||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
|
@ -113,15 +124,6 @@
|
|||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
<h2>Dominant Colors</h2>
|
||||
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
|
||||
</div>
|
||||
<div class="track-viz">
|
||||
{!! $dom_color !!}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</main>
|
||||
|
|
|
@ -26,10 +26,11 @@ Route::get('/detail/{id}', function(int $id) {
|
|||
$mediacomposition = json_decode($ep->mediacomposition, 1);
|
||||
$durationSteps = $mediacomposition['chapterList'][0]['duration'] / 1000 / 10;
|
||||
|
||||
$subdata = json_decode($ep->subtitle_data, 1);
|
||||
|
||||
$parser = new Podlove\Webvtt\Parser();
|
||||
$subtitles = $parser->parse($subtitles);
|
||||
//dump($subtitles);
|
||||
|
||||
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data]);
|
||||
return view('detail', ['title' => $title, 'subtitles' => $subtitles, 'mediacomposition' => $mediacomposition, 'durationSteps' => $durationSteps, 'dom_color' => $ep->viz_data, 'subdata' => $subdata]);
|
||||
|
||||
});
|
||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -4,14 +4,16 @@ con = sqlite3.connect("/home/gio/Code/VANA/database.sqlite")
|
|||
cur = con.cursor()
|
||||
|
||||
def get_subtitle(id):
|
||||
row = cur.execute("SELECT subtitles FROM episodes WHERE ?", [id])
|
||||
row = cur.execute("SELECT subtitles FROM episodes WHERE id = ?", [id])
|
||||
subtitles = row.fetchone()
|
||||
con.close()
|
||||
return row.fetchone()
|
||||
return subtitles
|
||||
|
||||
def get_mediacomposition(id):
|
||||
row = cur.execute("SELECT mediacomposition FROM episodes WHERE id = ?", [id])
|
||||
mediacomp = row.fetchone()
|
||||
con.close()
|
||||
return row.fetchone()
|
||||
return mediacomp
|
||||
|
||||
def get_podcast_url(id):
|
||||
row = cur.execute("SELECT json_extract(mediacomposition, '$.chapterList[0].podcastHdUrl') FROM episodes WHERE id = ?", [id])
|
||||
|
@ -20,7 +22,12 @@ def get_podcast_url(id):
|
|||
return podcast_url
|
||||
|
||||
def save_data_viz(id, data):
|
||||
row = cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
|
||||
cur.execute("UPDATE episodes SET viz_data = ? WHERE id = ?", [data, id])
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
def save_subtitle_data(id, data):
|
||||
cur.execute("UPDATE episodes SET subtitle_data = ? WHERE id = ?", [data, id])
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
|
|
Binary file not shown.
|
@ -0,0 +1,33 @@
|
|||
#!/usr/bin/env python3
|
||||
|
||||
import argparse
|
||||
import json
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
from database import queries
|
||||
|
||||
|
||||
def count_words(ep):
|
||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||
df = pl.read_csv(data_folder)
|
||||
|
||||
time = [0]
|
||||
word_count = [0]
|
||||
|
||||
for i, row in enumerate(df["sentences"]):
|
||||
time.append(round(df["end"][i]))
|
||||
word_count.append(word_count[i] + len(df["sentences"][i].split(" ")))
|
||||
|
||||
return json.dumps({"time" : time, "word_count" : word_count })
|
||||
|
||||
|
||||
# CLI
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="Count Words", description="Counts the words in the subtitle"
|
||||
)
|
||||
|
||||
parser.add_argument("--episode", "-ep")
|
||||
args = parser.parse_args()
|
||||
|
||||
queries.save_subtitle_data(args.episode, count_words(args.episode))
|
|
@ -1,10 +1,12 @@
|
|||
import re
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
|
||||
import polars as pl
|
||||
import spacy
|
||||
import timecode as tc
|
||||
import webvtt
|
||||
from database import queries
|
||||
|
||||
|
||||
class NormalizeVtt:
|
||||
|
@ -13,7 +15,10 @@ class NormalizeVtt:
|
|||
self.nlp = spacy.load("de_core_news_sm")
|
||||
|
||||
# Der Timecode der Untertitel wird angepasst, dass jeder Block einem Satz entspricht.
|
||||
def sentencize(self, vtt):
|
||||
def sentencize(self, ep):
|
||||
|
||||
vtt = queries.get_subtitle(ep)[0]
|
||||
|
||||
captions = webvtt.from_string(textwrap.dedent(vtt).strip())
|
||||
|
||||
sentences = []
|
||||
|
@ -79,4 +84,7 @@ class NormalizeVtt:
|
|||
text = text + " " + token.text
|
||||
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
||||
df = pl.DataFrame(data=d)
|
||||
return df.write_csv()
|
||||
|
||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||
|
||||
return df.write_csv(data_folder)
|
||||
|
|
|
@ -3,7 +3,6 @@
|
|||
import argparse
|
||||
|
||||
import normalize_vtt as nv
|
||||
from database import queries
|
||||
|
||||
normalizer = nv.NormalizeVtt()
|
||||
|
||||
|
@ -19,5 +18,5 @@ args = parser.parse_args()
|
|||
|
||||
match args.action:
|
||||
case "normalize":
|
||||
normalizer.sentencize(vtt=queries.get_subtitle(args.episode)[0])
|
||||
normalizer.sentencize(args.episode)
|
||||
print("Normalized.")
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
# video_colors.py
|
||||
Python script to extract the dominant colors of an image and generate a SVG containing them.
|
||||
The script expects a folder with all the extracted frames in ../../data/<id_episode|int>/
|
||||
The script expects a folder with all the extracted frames in `../../data/<id_episode|int>/`
|
||||
|
||||
```bash
|
||||
python src/dominant_colors.py -ep <int>
|
||||
```
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
# video_processing.py
|
||||
Python script to download episodes and extract the frames.
|
||||
The extracted frames are saved to ../../data/<id_episode|int>/
|
||||
The extracted frames are saved to `../../data/<id_episode|int>/`
|
||||
```bash
|
||||
python src/video_processing.py -a <"download"|"extract_frames"> -ep <int>
|
||||
```
|
||||
|
|
|
@ -1,4 +0,0 @@
|
|||
from pathlib import Path
|
||||
|
||||
data_folder = str(Path(__file__).parents[3])
|
||||
print(data_folder)
|
BIN
database.sqlite
BIN
database.sqlite
Binary file not shown.
Loading…
Reference in New Issue