feat: add transcript display to episode page

+ fix transcript parser

closes #411
This commit is contained in:
Guy Martin (Dwev) 2024-04-17 09:13:07 +00:00 committed by Yassine Doghri
parent 88851b0226
commit 4d141fceae
9 changed files with 274 additions and 23 deletions

View File

@ -128,6 +128,9 @@ $routes->group('@(:podcastHandle)', static function ($routes): void {
$routes->get('chapters', 'EpisodeController::chapters/$1/$2', [
'as' => 'episode-chapters',
]);
$routes->get('transcript', 'EpisodeController::transcript/$1/$2', [
'as' => 'episode-transcript',
]);
$routes->options('comments', 'ActivityPubController::preflight');
$routes->get('comments', 'EpisodeController::comments/$1/$2', [
'as' => 'episode-comments',
@ -205,6 +208,9 @@ $routes->get('/p/(:uuid)/activity', 'EpisodePreviewController::activity/$1', [
$routes->get('/p/(:uuid)/chapters', 'EpisodePreviewController::chapters/$1', [
'as' => 'episode-preview-chapters',
]);
$routes->get('/p/(:uuid)/transcript', 'EpisodePreviewController::transcript/$1', [
'as' => 'episode-preview-transcript',
]);
// Other pages
$routes->get('/credits', 'CreditsController', [

View File

@ -167,7 +167,7 @@ class EpisodeController extends BaseController
return $cachedView;
}
public function chapters(): String
public function chapters(): string
{
// Prevent analytics hit when authenticated
if (! auth()->loggedIn()) {
@ -228,6 +228,72 @@ class EpisodeController extends BaseController
return $cachedView;
}
public function transcript(): string
{
// Prevent analytics hit when authenticated
if (! auth()->loggedIn()) {
$this->registerPodcastWebpageHit($this->episode->podcast_id);
}
$cacheName = implode(
'_',
array_filter([
'page',
"podcast#{$this->podcast->id}",
"episode#{$this->episode->id}",
'transcript',
service('request')
->getLocale(),
is_unlocked($this->podcast->handle) ? 'unlocked' : null,
auth()
->loggedIn() ? 'authenticated' : null,
]),
);
if (! ($cachedView = cache($cacheName))) {
// get transcript from json file
$data = [
'metatags' => get_episode_metatags($this->episode),
'podcast' => $this->podcast,
'episode' => $this->episode,
];
if ($this->episode->transcript !== null) {
$data['transcript'] = $this->episode->transcript;
if ($this->episode->transcript->json_key !== null) {
/** @var FileManagerInterface $fileManager */
$fileManager = service('file_manager');
$transcriptJsonString = (string) $fileManager->getFileContents(
$this->episode->transcript->json_key
);
$data['captions'] = json_decode($transcriptJsonString, true);
}
}
$secondsToNextUnpublishedEpisode = (new EpisodeModel())->getSecondsToNextUnpublishedEpisode(
$this->podcast->id,
);
if (auth()->loggedIn()) {
helper('form');
return view('episode/transcript', $data);
}
// The page cache is set to a decade so it is deleted manually upon podcast update
return view('episode/transcript', $data, [
'cache' => $secondsToNextUnpublishedEpisode
? $secondsToNextUnpublishedEpisode
: DECADE,
'cache_name' => $cacheName,
]);
}
return $cachedView;
}
public function embed(string $theme = 'light-transparent'): string
{
header('Content-Security-Policy: frame-ancestors http://*:* https://*:*');

View File

@ -13,7 +13,6 @@ namespace App\Controllers;
use App\Entities\Episode;
use App\Models\EpisodeModel;
use CodeIgniter\Exceptions\PageNotFoundException;
use CodeIgniter\HTTP\RedirectResponse;
use Modules\Media\FileManagers\FileManagerInterface;
class EpisodePreviewController extends BaseController
@ -45,7 +44,7 @@ class EpisodePreviewController extends BaseController
return $this->{$method}(...$params);
}
public function index(): RedirectResponse | string
public function index(): string
{
helper('form');
@ -55,7 +54,7 @@ class EpisodePreviewController extends BaseController
]);
}
public function activity(): RedirectResponse | string
public function activity(): string
{
helper('form');
@ -65,7 +64,7 @@ class EpisodePreviewController extends BaseController
]);
}
public function chapters(): RedirectResponse | string
public function chapters(): string
{
$data = [
'podcast' => $this->episode->podcast,
@ -84,4 +83,30 @@ class EpisodePreviewController extends BaseController
helper('form');
return view('episode/preview-chapters', $data);
}
public function transcript(): string
{
// get transcript from json file
$data = [
'podcast' => $this->episode->podcast,
'episode' => $this->episode,
];
if ($this->episode->transcript !== null) {
$data['transcript'] = $this->episode->transcript;
if ($this->episode->transcript->json_key !== null) {
/** @var FileManagerInterface $fileManager */
$fileManager = service('file_manager');
$transcriptJsonString = (string) $fileManager->getFileContents(
$this->episode->transcript->json_key
);
$data['captions'] = json_decode($transcriptJsonString, true);
}
}
helper('form');
return view('episode/preview-transcript', $data);
}
}

View File

@ -24,6 +24,7 @@ return [
'comments' => 'Comments',
'activity' => 'Activity',
'chapters' => 'Chapters',
'transcript' => 'Transcript',
'description' => 'Episode description',
'number_of_comments' => '{numberOfComments, plural,
one {# comment}
@ -44,4 +45,6 @@ return [
'publish_edit' => 'Edit publication',
],
'no_chapters' => 'No chapters are available for this episode.',
'download_transcript' => 'Download transcript ({extension})',
'no_transcript' => 'No transcript available for this episode.',
];

View File

@ -144,6 +144,7 @@ class TranscriptParser
break;
case VTT_STATE_BLANK:
$speakercount = 0;
$state = VTT_STATE_TIME;
break;
@ -154,20 +155,8 @@ class TranscriptParser
case VTT_STATE_TEXT:
if (trim($line) === '') {
$sub = new stdClass();
$sub->number = $subNum;
[$startTime, $endTime] = explode(' --> ', $subTime);
$sub->startTime = $this->getSecondsFromVTTTimeString($startTime);
$sub->endTime = $this->getSecondsFromVTTTimeString($endTime);
$sub->text = trim($subText);
if ($subSpeaker !== '') {
$sub->speaker = trim((string) $subSpeaker);
}
$subText = '';
$state = VTT_STATE_TIME;
$subs[] = $sub;
++$subNum;
//$subs[] = $sub;
} elseif ($subText !== '') {
$subText .= PHP_EOL . $line;
} else {
@ -179,18 +168,36 @@ class TranscriptParser
* 2. Who is speaking
* 3. Any styling cues encoded in the VTT (which we dump)
* More information: https://www.w3.org/TR/webvtt1/
*/
$vtt_speaker_pattern = '/^<.*>/';
$removethese = ['<', '>'];
*
* If there is more than one speaker in a cue, we also need
* to handle this, to repeat the start and end times for
* the second cue.
* */
$vtt_speaker_pattern = '/^<.*>/U';
$removethese = ['</v>', '<', '>'];
preg_match($vtt_speaker_pattern, $line, $matches);
if (isset($matches[0])) {
$subVoiceCue = explode(' ', str_replace($removethese, '', $matches[0]));
$subSpeaker = $subVoiceCue[1];
$subVoiceCue = str_replace($removethese, '', $matches[0]);
$subSpeaker = substr($subVoiceCue, strpos($subVoiceCue, ' '));
} else {
$subSpeaker = '';
}
$subText .= preg_replace($vtt_speaker_pattern, '', $line);
$sub = new stdClass();
$sub->number = $subNum;
[$startTime, $endTime] = explode(' --> ', $subTime);
$sub->startTime = $this->getSecondsFromVTTTimeString($startTime);
$sub->endTime = $this->getSecondsFromVTTTimeString($endTime);
$sub->text = trim($subText);
if ($subSpeaker !== '') {
$sub->speaker = trim($subSpeaker);
}
$subText = '';
$subs[] = $sub;
++$subNum;
}
break;
@ -215,6 +222,11 @@ class TranscriptParser
private function getSecondsFromVTTTimeString(string $timeString): float
{
$timeString = explode('.', $timeString);
if (substr_count($timeString[0], ':') === 1) {
// add hours if only MM:SS.mmm format
$timeString[0] = '00:' . $timeString[0];
}
return (strtotime($timeString[0]) - strtotime('TODAY')) + (float) "0.{$timeString[1]}";
}
}

View File

@ -17,6 +17,11 @@ if ($episode->publication_status === 'published') {
'label' => lang('Episode.chapters'),
'labelInfo' => $episode->chapters === null ? 0 : $episode->chapters->chapter_count,
],
[
'uri' => route_to('episode-transcript', esc($podcast->handle), esc($episode->slug)),
'label' => lang('Episode.transcript'),
'labelInfo' => $episode->transcript === null ? '&ndash;' : '✓',
],
];
} else {
$navigationItems = [
@ -35,6 +40,11 @@ if ($episode->publication_status === 'published') {
'label' => lang('Episode.chapters'),
'labelInfo' => $episode->chapters === null ? 0 : $episode->chapters->chapter_count,
],
[
'uri' => route_to('episode-preview-transcript', $episode->preview_id),
'label' => lang('Episode.transcript'),
'labelInfo' => $episode->transcript === null ? '&ndash;' : '✓',
],
];
}

View File

@ -0,0 +1,9 @@
<article class="flex flex-col items-baseline p-2 sm:flex-row gap-x-2">
<span class="px-1 text-sm font-semibold rounded bg-subtle"><?= $startTime ?></span>
<p>
<?php if ($speaker !== ''): ?>
<span class="mr-1 font-bold"><?= $speaker ?></span>
<?php endif; ?>
<?= $text ?>
</p>
</article>

View File

@ -0,0 +1,60 @@
<?= $this->extend('episode/_layout-preview') ?>
<?= $this->section('content') ?>
<?php if (isset($captions)) : ?>
<div class="flex flex-col gap-2">
<Button uri="<?= $transcript->file_url ?>" size="small" iconLeft="download" class="self-start" variant="secondary" target="_blank" download="" rel="noopener noreferrer"><?= lang('Episode.download_transcript', [
'extension' => '.' . $transcript->file_extension,
]) ?></Button>
<?php
$previousSpeaker = '';
$previousStartTime = '';
$captionTextBlock = '';
$renderCue = false;
foreach ($captions as $caption) {
$captionText = array_key_exists('text', $caption) ? $caption['text'] : '';
if (isset($caption['speaker'])) {
if ($caption['speaker'] !== $previousSpeaker) {
if ($renderCue === true) {
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted ?? '',
'speaker' => $speakerLabel ?? '',
'text' => $captionTextBlock ?? '',
]);
$captionTextBlock = '';
}
$startTimeFormatted = format_duration($caption['startTime']);
$speakerLabel = $caption['speaker'];
$captionTextBlock .= $captionText;
$previousSpeaker = $speakerLabel;
$renderCue = true;
} else {
// concatenate cues with the same speaker
$captionTextBlock .= ' ' . $captionText;
}
} else {
$startTimeFormatted = isset($caption['startTime']) ? format_duration($caption['startTime']) : '';
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted,
'speaker' => $caption['speaker'] ?? '',
'text' => $captionText ?? '',
]);
}
}
// render last cue if not already rendered
if ($captionTextBlock !== '') {
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted ?? '',
'speaker' => $speakerLabel ?? '',
'text' => $captionTextBlock ?? '',
]);
}
?>
</div>
<?php else : ?>
<div class="text-center"><?= lang('Episode.no_transcript') ?></div>
<?php endif; ?>
<?= $this->endSection() ?>

View File

@ -0,0 +1,60 @@
<?= $this->extend('episode/_layout') ?>
<?= $this->section('content') ?>
<?php if (isset($captions)) : ?>
<div class="flex flex-col gap-2">
<Button uri="<?= $transcript->file_url ?>" size="small" iconLeft="download" class="self-start" variant="secondary" target="_blank" download="" rel="noopener noreferrer"><?= lang('Episode.download_transcript', [
'extension' => '.' . $transcript->file_extension,
]) ?></Button>
<?php
$previousSpeaker = '';
$previousStartTime = '';
$captionTextBlock = '';
$renderCue = false;
foreach ($captions as $caption) {
$captionText = array_key_exists('text', $caption) ? $caption['text'] : '';
if (isset($caption['speaker'])) {
if ($caption['speaker'] !== $previousSpeaker) {
if ($renderCue === true) {
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted ?? '',
'speaker' => $speakerLabel ?? '',
'text' => $captionTextBlock ?? '',
]);
$captionTextBlock = '';
}
$startTimeFormatted = format_duration($caption['startTime']);
$speakerLabel = $caption['speaker'];
$captionTextBlock .= $captionText;
$previousSpeaker = $speakerLabel;
$renderCue = true;
} else {
// concatenate cues with the same speaker
$captionTextBlock .= ' ' . $captionText;
}
} else {
$startTimeFormatted = isset($caption['startTime']) ? format_duration($caption['startTime']) : '';
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted,
'speaker' => $caption['speaker'] ?? '',
'text' => $captionText ?? '',
]);
}
}
// render last cue if not already rendered
if ($captionTextBlock !== '') {
echo view('episode/_partials/transcript', [
'startTime' => $startTimeFormatted ?? '',
'speaker' => $speakerLabel ?? '',
'text' => $captionTextBlock ?? '',
]);
}
?>
</div>
<?php else : ?>
<div class="text-center"><?= lang('Episode.no_transcript') ?></div>
<?php endif; ?>
<?= $this->endSection() ?>