improvement topic modelling
parent
021b828fea
commit
f7c0df98b2
|
@ -47,7 +47,7 @@ class Episode extends Api
|
|||
$subtitles = new Subtitles;
|
||||
$subs = $subtitles->getWebVTT('urn:srf:episode:tv:'.$ep['id']);
|
||||
|
||||
$test = EpisodeModel::firstOrCreate(
|
||||
$epModel = EpisodeModel::updateOrCreate(
|
||||
[
|
||||
'urn' => $ep['fullLengthUrn']
|
||||
],
|
||||
|
|
|
@ -18,13 +18,13 @@ class Subtitles extends Api
|
|||
|
||||
public function getUrl(string $urn): string
|
||||
{
|
||||
|
||||
$response = Http::withHeaders($this->headers)->withQueryParameters([
|
||||
'episode' => $urn,
|
||||
])->get($this->endpoint.'subtitles');
|
||||
|
||||
|
||||
if($response->ok()){
|
||||
return $response->json()[0]['url'];
|
||||
if($response->ok() && !empty($response[0]['url'])){
|
||||
return $response[0]['url'];
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -34,9 +34,16 @@ class Subtitles extends Api
|
|||
public function getWebVTT(string $urn): ?string
|
||||
{
|
||||
$url = $this->getUrl($urn);
|
||||
$response = Http::get($url);
|
||||
$response = null;
|
||||
|
||||
if($response->successful()){
|
||||
if($url){
|
||||
$response = Http::get($url);
|
||||
} else {
|
||||
$altUrn = explode(":", $urn)[4];
|
||||
$response = Http::get("https://subtitles.eai-general.aws.srf.ch/srf/{$altUrn}/episode/de/vod/vod.vtt");
|
||||
}
|
||||
|
||||
if($response && $response->successful()){
|
||||
return $response->body();
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
<?php
|
||||
|
||||
use Illuminate\Database\Migrations\Migration;
|
||||
use Illuminate\Database\Schema\Blueprint;
|
||||
use Illuminate\Support\Facades\Schema;
|
||||
|
||||
return new class extends Migration
|
||||
{
|
||||
/**
|
||||
* Run the migrations.
|
||||
*/
|
||||
public function up(): void
|
||||
{
|
||||
Schema::table('episodes', function (Blueprint $table) {
|
||||
$table->json('topics')->nullable();
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Reverse the migrations.
|
||||
*/
|
||||
public function down(): void
|
||||
{
|
||||
//
|
||||
}
|
||||
};
|
|
@ -47,6 +47,17 @@ input, button, textarea, select {
|
|||
font: inherit;
|
||||
}
|
||||
|
||||
input{
|
||||
margin-top: .2em;
|
||||
border-radius: .2em;
|
||||
border: 1px solid #fff;
|
||||
background: var(--c3);
|
||||
color: #fff;
|
||||
padding: .2em .5em;
|
||||
display: block;
|
||||
}
|
||||
|
||||
|
||||
h1,
|
||||
h2,
|
||||
h3,
|
||||
|
@ -294,6 +305,21 @@ main {
|
|||
background: var(--c3);
|
||||
}
|
||||
|
||||
.track-ctrl h2{
|
||||
font-size: 1.2em;
|
||||
}
|
||||
|
||||
.track-ctrl p {
|
||||
margin-top: 1em;
|
||||
font-size: 1em;
|
||||
line-height: 1.5;
|
||||
}
|
||||
|
||||
.track-ctrl label {
|
||||
margin-top: 1em;
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.track-viz{
|
||||
background: var(--c3);
|
||||
}
|
||||
|
|
|
@ -116,7 +116,10 @@
|
|||
<div class="track">
|
||||
<div class="track-ctrl">
|
||||
<h2>Topics</h2>
|
||||
<label>
|
||||
Segmente:
|
||||
<input type="number" min="1" max="10" value="4" id="topic-track-segment-ctrl">
|
||||
</label>
|
||||
</div>
|
||||
<div class="track-viz">
|
||||
<ul id="topic-segement-list" class="segments">
|
||||
|
|
|
@ -36,4 +36,9 @@ def save_sentiments_f_sub(id, data):
|
|||
con.commit()
|
||||
con.close()
|
||||
|
||||
def save_topics(id, data):
|
||||
cur.execute("UPDATE episodes SET topics = ? WHERE id = ?", [data, id])
|
||||
con.commit()
|
||||
con.close()
|
||||
|
||||
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
# subtitles_processing
|
||||
Paket zur Aufbereitung der Untertitel.
|
||||
|
||||
## subtitles-processing.py
|
||||
Normalisiert die Untertitel einer Episode. Die timecodes werden umgespeichert, damit immer ganze Sätze pro Zeile vorhanden sind.
|
||||
|
||||
```bash
|
||||
python src/normalize_subtitles/subtitles-processing.py -a <"normalize"> -ep <int>
|
||||
```
|
||||
|
||||
## count_words.py
|
||||
Zählt die Wortanzahl pro Satz.
|
||||
```bash
|
||||
python src/normalize_subtitles/count_words.py -ep <int>
|
||||
```
|
||||
|
||||
## count_words.py
|
||||
Rechnet die Sentimente pro Satz.
|
||||
```bash
|
||||
python src/normalize_subtitles/sentence_sentiment.py -ep <int>
|
||||
```
|
File diff suppressed because it is too large
Load Diff
|
@ -1,8 +1,8 @@
|
|||
[project]
|
||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
||||
dependencies = ["webvtt-py>=0.5.1,<0.6", "spacy-llm>=0.7.2,<0.8", "germansentiment>=1.1.0,<2"]
|
||||
dependencies = ["webvtt-py>=0.5.1,<0.6", "spacy-llm>=0.7.2,<0.8", "germansentiment>=1.1.0,<2", "bertopic>=0.16.4,<0.17", "gensim>=4.3.3,<5"]
|
||||
description = "Normalisierung der Untertitel."
|
||||
name = "normalize_subtitles"
|
||||
name = "subtitles_processing"
|
||||
requires-python = ">= 3.11"
|
||||
version = "0.1.0"
|
||||
|
||||
|
@ -15,7 +15,7 @@ channels = ["conda-forge"]
|
|||
platforms = ["win-64", "linux-64", "osx-64"]
|
||||
|
||||
[tool.pixi.pypi-dependencies]
|
||||
normalize_subtitles = { path = ".", editable = true }
|
||||
subtitles_processing = { path = ".", editable = true }
|
||||
database = { path = "../database", editable = true}
|
||||
|
||||
[tool.pixi.tasks]
|
Binary file not shown.
|
@ -85,6 +85,7 @@ class NormalizeVtt:
|
|||
d = {"sentences": sentences, "start": times_start, "end": times_end}
|
||||
df = pl.DataFrame(data=d)
|
||||
|
||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||
DATA_PATH = Path(__file__).parents[4]
|
||||
data_folder = DATA_PATH / 'data' / ep / 'normalized_vtt.csv'
|
||||
|
||||
return df.write_csv(data_folder)
|
||||
df.write_csv(data_folder)
|
|
@ -10,7 +10,7 @@ from germansentiment import SentimentModel
|
|||
|
||||
|
||||
def get_sent(ep):
|
||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + ep + '/normalized_vtt.csv'
|
||||
data_folder = str(Path(__file__).parents[4]) + "/data/" + str(ep) + '/normalized_vtt.csv'
|
||||
df = pl.read_csv(data_folder)
|
||||
|
||||
model = SentimentModel()
|
File diff suppressed because one or more lines are too long
|
@ -1,2 +0,0 @@
|
|||
# GitHub syntax highlighting
|
||||
pixi.lock linguist-language=YAML linguist-generated=true
|
|
@ -1,4 +0,0 @@
|
|||
|
||||
# pixi environments
|
||||
.pixi
|
||||
*.egg-info
|
File diff suppressed because it is too large
Load Diff
|
@ -1,23 +0,0 @@
|
|||
[project]
|
||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
||||
dependencies = []
|
||||
description = "Add a short description here"
|
||||
name = "video_deepface"
|
||||
requires-python = ">=3.7,<3.11"
|
||||
version = "0.1.0"
|
||||
|
||||
[build-system]
|
||||
build-backend = "hatchling.build"
|
||||
requires = ["hatchling"]
|
||||
|
||||
[tool.pixi.project]
|
||||
channels = ["conda-forge"]
|
||||
platforms = ["linux-64"]
|
||||
|
||||
[tool.pixi.pypi-dependencies]
|
||||
video_deepface = { path = ".", editable = true }
|
||||
|
||||
[tool.pixi.tasks]
|
||||
|
||||
[tool.pixi.dependencies]
|
||||
deepface = ">=0.0.86"
|
|
@ -1,10 +0,0 @@
|
|||
from deepface import DeepFace
|
||||
|
||||
"""
|
||||
objs = DeepFace.analyze(
|
||||
img_path = "/home/gio/Code/VANA/data/24/frame000010.jpg",
|
||||
actions = ['age', 'gender', 'emotion'],
|
||||
)
|
||||
|
||||
print(objs)
|
||||
"""
|
|
@ -1,2 +0,0 @@
|
|||
# GitHub syntax highlighting
|
||||
pixi.lock linguist-language=YAML linguist-generated=true
|
|
@ -1,4 +0,0 @@
|
|||
|
||||
# pixi environments
|
||||
.pixi
|
||||
*.egg-info
|
File diff suppressed because it is too large
Load Diff
|
@ -1,20 +0,0 @@
|
|||
[project]
|
||||
authors = [{name = "Giò Diani", email = "mail@gionathandiani.name"}]
|
||||
dependencies = ["deepface>=0.0.93,<0.0.94"]
|
||||
description = "Add a short description here"
|
||||
name = "video_deepface2"
|
||||
requires-python = ">= 3.6,<3.11"
|
||||
version = "0.1.0"
|
||||
|
||||
[build-system]
|
||||
build-backend = "hatchling.build"
|
||||
requires = ["hatchling"]
|
||||
|
||||
[tool.pixi.project]
|
||||
channels = ["conda-forge"]
|
||||
platforms = ["linux-64"]
|
||||
|
||||
[tool.pixi.pypi-dependencies]
|
||||
video_deepface2 = { path = ".", editable = true }
|
||||
|
||||
[tool.pixi.tasks]
|
|
@ -1,9 +0,0 @@
|
|||
from deepface import DeepFace
|
||||
|
||||
objs = DeepFace.analyze(
|
||||
img_path = "/home/gio/Code/VANA/data/24/frame000305.jpg",
|
||||
actions = ['age', 'gender', 'emotion'],
|
||||
)
|
||||
|
||||
print(objs)
|
||||
|
|
@ -8,7 +8,7 @@
|
|||
[
|
||||
{
|
||||
"name": "Python venv",
|
||||
"cmd": ["/home/gio/Code/VANA/VANA-python/video_deepface2/.pixi/envs/default/bin/python", "$file"],
|
||||
"cmd": ["/home/gio/Code/VANA/VANA-python/subtitles_processing/.pixi/envs/default/bin/python", "$file"],
|
||||
"selector": "source.python",
|
||||
"file_regex": "^\\s*File \"(...*?)\", line ([0-9]*)"
|
||||
}
|
||||
|
|
18203
VANA.sublime-workspace
18203
VANA.sublime-workspace
File diff suppressed because one or more lines are too long
BIN
database.sqlite
BIN
database.sqlite
Binary file not shown.
Loading…
Reference in New Issue