improvement topic modelling
parent
021b828fea
commit
f7c0df98b2
|
@ -47,7 +47,7 @@ class Episode extends Api
|
||||||
$subtitles = new Subtitles;
|
$subtitles = new Subtitles;
|
||||||
$subs = $subtitles->getWebVTT('urn:srf:episode:tv:'.$ep['id']);
|
$subs = $subtitles->getWebVTT('urn:srf:episode:tv:'.$ep['id']);
|
||||||
|
|
||||||
$test = EpisodeModel::firstOrCreate(
|
$epModel = EpisodeModel::updateOrCreate(
|
||||||
[
|
[
|
||||||
'urn' => $ep['fullLengthUrn']
|
'urn' => $ep['fullLengthUrn']
|
||||||
],
|
],
|
||||||
|
|
|
@ -18,13 +18,13 @@ class Subtitles extends Api
|
||||||
|
|
||||||
public function getUrl(string $urn): string
|
public function getUrl(string $urn): string
|
||||||
{
|
{
|
||||||
|
|
||||||
$response = Http::withHeaders($this->headers)->withQueryParameters([
|
$response = Http::withHeaders($this->headers)->withQueryParameters([
|
||||||
'episode' => $urn,
|
'episode' => $urn,
|
||||||
])->get($this->endpoint.'subtitles');
|
])->get($this->endpoint.'subtitles');
|
||||||
|
|
||||||
|
if($response->ok() && !empty($response[0]['url'])){
|
||||||
if($response->ok()){
|
return $response[0]['url'];
|
||||||
return $response->json()[0]['url'];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
|
@ -34,9 +34,16 @@ class Subtitles extends Api
|
||||||
public function getWebVTT(string $urn): ?string
|
public function getWebVTT(string $urn): ?string
|
||||||
{
|
{
|
||||||
$url = $this->getUrl($urn);
|
$url = $this->getUrl($urn);
|
||||||
$response = Http::get($url);
|
$response = null;
|
||||||
|
|
||||||
if($response->successful()){
|
if($url){
|
||||||
|
$response = Http::get($url);
|
||||||
|
} else {
|
||||||
|
$altUrn = explode(":", $urn)[4];
|
||||||
|
$response = Http::get("https://subtitles.eai-general.aws.srf.ch/srf/{$altUrn}/episode/de/vod/vod.vtt");
|
||||||
|
}
|
||||||
|
|
||||||
|
if($response && $response->successful()){
|
||||||
return $response->body();
|
return $response->body();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,26 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
use Illuminate\Database\Migrations\Migration;
|
||||||
|
use Illuminate\Database\Schema\Blueprint;
|
||||||
|
use Illuminate\Support\Facades\Schema;
|
||||||
|
|
||||||
|
return new class extends Migration
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* Run the migrations.
|
||||||
|
*/
|
||||||
|
public function up(): void
|
||||||
|
{
|
||||||
|
Schema::table('episodes', function (Blueprint $table) {
|
||||||
|
$table->json('topics')->nullable();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reverse the migrations.
|
||||||
|
*/
|
||||||
|
public function down(): void
|
||||||
|
{
|
||||||
|
//
|
||||||
|
}
|
||||||
|
};
|
|
@ -47,6 +47,17 @@ input, button, textarea, select {
|
||||||
font: inherit;
|
font: inherit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
input{
|
||||||
|
margin-top: .2em;
|
||||||
|
border-radius: .2em;
|
||||||
|
border: 1px solid #fff;
|
||||||
|
background: var(--c3);
|
||||||
|
color: #fff;
|
||||||
|
padding: .2em .5em;
|
||||||
|
display: block;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
h1,
|
h1,
|
||||||
h2,
|
h2,
|
||||||
h3,
|
h3,
|
||||||
|
@ -294,6 +305,21 @@ main {
|
||||||
background: var(--c3);
|
background: var(--c3);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.track-ctrl h2{
|
||||||
|
font-size: 1.2em;
|
||||||
|
}
|
||||||
|
|
||||||
|
.track-ctrl p {
|
||||||
|
margin-top: 1em;
|
||||||
|
font-size: 1em;
|
||||||
|
line-height: 1.5;
|
||||||
|
}
|
||||||
|
|
||||||
|
.track-ctrl label {
|
||||||
|
margin-top: 1em;
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
.track-viz{
|
.track-viz{
|
||||||
background: var(--c3);
|
background: var(--c3);
|
||||||
}
|
}
|
||||||
|
|
|
@ -116,7 +116,10 @@
|
||||||
<div class="track">
|
<div class="track">
|
||||||
<div class="track-ctrl">
|
<div class="track-ctrl">
|
||||||
<h2>Topics</h2>
|
<h2>Topics</h2>
|
||||||
|
<label>
|
||||||
|
Segmente:
|
||||||
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
|
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
|
||||||
|
</label>
|
||||||
</div>
|
</div>
|
||||||
<div class="track-viz">
|
<div class="track-viz">
|
||||||
<ul id="topic-segement-list" class="segments">
|
<ul id="topic-segement-list" class="segments">
|
||||||
|
|
|
@ -36,4 +36,9 @@ def save_sentiments_f_sub(id, data):
|
||||||
con.commit()
|
con.commit()
|
||||||
con.close()
|
con.close()
|
||||||
|
|
||||||
|
def save_topics(id, data):
|
||||||
|
cur.execute("UPDATE episodes SET topics = ? WHERE id = ?", [data, id])
|
||||||
|
con.commit()
|
||||||
|
con.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
# subtitles_processing
|
||||||
|
Paket zur Aufbereitung der Untertitel.
|
||||||
|
|
||||||
|
## subtitles-processing.py
|
||||||
|
Normalisiert die Untertitel einer Episode. Die timecodes werden umgespeichert, damit immer ganze Sätze pro Zeile vorhanden sind.
|
||||||
|
|
||||||
|
```bash
|
||||||
|
python src/normalize_subtitles/subtitles-processing.py -a <"normalize"> -ep <int>
|
||||||
|
```
|
||||||
|
|
||||||
|
## count_words.py
|
||||||
|
Zählt die Wortanzahl pro Satz.
|
||||||
|
```bash
|
||||||
|
python src/normalize_subtitles/count_words.py -ep <int>
|
||||||
|
```
|
||||||
|
|
||||||
|
## count_words.py
|
||||||
|
Rechnet die Sentimente pro Satz.
|
||||||
|
```bash
|
||||||
|
python src/normalize_subtitles/sentence_sentiment.py -ep <int>
|
||||||
|
```
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,8 @@
|
||||||
[project]
|
[project]
|
||||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
||||||
dependencies = ["webvtt-py>=0.5.1,<0.6", "spacy-llm>=0.7.2,<0.8", "germansentiment>=1.1.0,<2"]
|
dependencies = ["webvtt-py>=0.5.1,<0.6", "spacy-llm>=0.7.2,<0.8", "germansentiment>=1.1.0,<2", "bertopic>=0.16.4,<0.17", "gensim>=4.3.3,<5"]
|
||||||
description = "Normalisierung der Untertitel."
|
description = "Normalisierung der Untertitel."
|
||||||
name = "normalize_subtitles"
|
name = "subtitles_processing"
|
||||||
requires-python = ">= 3.11"
|
requires-python = ">= 3.11"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
|
||||||
|
@ -15,7 +15,7 @@ channels = ["conda-forge"]
|
||||||
platforms = ["win-64", "linux-64", "osx-64"]
|
platforms = ["win-64", "linux-64", "osx-64"]
|
||||||
|
|
||||||
[tool.pixi.pypi-dependencies]
|
[tool.pixi.pypi-dependencies]
|
||||||
normalize_subtitles = { path = ".", editable = true }
|
subtitles_processing = { path = ".", editable = true }
|
||||||
database = { path = "../database", editable = true}
|
database = { path = "../database", editable = true}
|
||||||
|
|
||||||
[tool.pixi.tasks]
|
[tool.pixi.tasks]
|
Binary file not shown.
|
@ -85,6 +85,7 @@ class NormalizeVtt:
|
||||||
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
||||||
df = pl.DataFrame(data=d)
|
df = pl.DataFrame(data=d)
|
||||||
|
|
||||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
DATA_PATH = Path(__file__).parents[4]
|
||||||
|
data_folder = DATA_PATH / 'data' / ep / 'normalized_vtt.csv'
|
||||||
|
|
||||||
return df.write_csv(data_folder)
|
df.write_csv(data_folder)
|
|
@ -10,7 +10,7 @@ from germansentiment import SentimentModel
|
||||||
|
|
||||||
|
|
||||||
def get_sent(ep):
|
def get_sent(ep):
|
||||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
data_folder = str(Path(__file__).parents[4]) + "/data/" + str(ep) + '/normalized_vtt.csv'
|
||||||
df = pl.read_csv(data_folder)
|
df = pl.read_csv(data_folder)
|
||||||
|
|
||||||
model = SentimentModel()
|
model = SentimentModel()
|
File diff suppressed because one or more lines are too long
|
@ -1,2 +0,0 @@
|
||||||
# GitHub syntax highlighting
|
|
||||||
pixi.lock linguist-language=YAML linguist-generated=true
|
|
|
@ -1,4 +0,0 @@
|
||||||
|
|
||||||
# pixi environments
|
|
||||||
.pixi
|
|
||||||
*.egg-info
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,23 +0,0 @@
|
||||||
[project]
|
|
||||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
|
||||||
dependencies = []
|
|
||||||
description = "Add a short description here"
|
|
||||||
name = "video_deepface"
|
|
||||||
requires-python = ">=3.7,<3.11"
|
|
||||||
version = "0.1.0"
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
build-backend = "hatchling.build"
|
|
||||||
requires = ["hatchling"]
|
|
||||||
|
|
||||||
[tool.pixi.project]
|
|
||||||
channels = ["conda-forge"]
|
|
||||||
platforms = ["linux-64"]
|
|
||||||
|
|
||||||
[tool.pixi.pypi-dependencies]
|
|
||||||
video_deepface = { path = ".", editable = true }
|
|
||||||
|
|
||||||
[tool.pixi.tasks]
|
|
||||||
|
|
||||||
[tool.pixi.dependencies]
|
|
||||||
deepface = ">=0.0.86"
|
|
|
@ -1,10 +0,0 @@
|
||||||
from deepface import DeepFace
|
|
||||||
|
|
||||||
"""
|
|
||||||
objs = DeepFace.analyze(
|
|
||||||
img_path = "/home/gio/Code/VANA/data/24/frame000010.jpg",
|
|
||||||
actions = ['age', 'gender', 'emotion'],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(objs)
|
|
||||||
"""
|
|
|
@ -1,2 +0,0 @@
|
||||||
# GitHub syntax highlighting
|
|
||||||
pixi.lock linguist-language=YAML linguist-generated=true
|
|
|
@ -1,4 +0,0 @@
|
||||||
|
|
||||||
# pixi environments
|
|
||||||
.pixi
|
|
||||||
*.egg-info
|
|
File diff suppressed because it is too large
Load Diff
|
@ -1,20 +0,0 @@
|
||||||
[project]
|
|
||||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
|
||||||
dependencies = ["deepface>=0.0.93,<0.0.94"]
|
|
||||||
description = "Add a short description here"
|
|
||||||
name = "video_deepface2"
|
|
||||||
requires-python = ">= 3.6,<3.11"
|
|
||||||
version = "0.1.0"
|
|
||||||
|
|
||||||
[build-system]
|
|
||||||
build-backend = "hatchling.build"
|
|
||||||
requires = ["hatchling"]
|
|
||||||
|
|
||||||
[tool.pixi.project]
|
|
||||||
channels = ["conda-forge"]
|
|
||||||
platforms = ["linux-64"]
|
|
||||||
|
|
||||||
[tool.pixi.pypi-dependencies]
|
|
||||||
video_deepface2 = { path = ".", editable = true }
|
|
||||||
|
|
||||||
[tool.pixi.tasks]
|
|
|
@ -1,9 +0,0 @@
|
||||||
from deepface import DeepFace
|
|
||||||
|
|
||||||
objs = DeepFace.analyze(
|
|
||||||
img_path = "/home/gio/Code/VANA/data/24/frame000305.jpg",
|
|
||||||
actions = ['age', 'gender', 'emotion'],
|
|
||||||
)
|
|
||||||
|
|
||||||
print(objs)
|
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
[
|
[
|
||||||
{
|
{
|
||||||
"name": "Python venv",
|
"name": "Python venv",
|
||||||
"cmd": ["/home/gio/Code/VANA/VANA-python/video_deepface2/.pixi/envs/default/bin/python", "$file"],
|
"cmd": ["/home/gio/Code/VANA/VANA-python/subtitles_processing/.pixi/envs/default/bin/python", "$file"],
|
||||||
"selector": "source.python",
|
"selector": "source.python",
|
||||||
"file_regex": "^\\s*File \"(...*?)\", line ([0-9]*)"
|
"file_regex": "^\\s*File \"(...*?)\", line ([0-9]*)"
|
||||||
}
|
}
|
||||||
|
|
18293
VANA.sublime-workspace
18293
VANA.sublime-workspace
File diff suppressed because one or more lines are too long
BIN
database.sqlite
BIN
database.sqlite
Binary file not shown.
Loading…
Reference in New Issue