184 lines
5.6 KiB
PHP
184 lines
5.6 KiB
PHP
<?php
|
|
|
|
namespace App\Services;
|
|
|
|
use App\Services\DTO\ParsedCcliSection;
|
|
use App\Services\DTO\ParsedCcliSong;
|
|
use App\Support\CcliLabels;
|
|
use Closure;
|
|
use InvalidArgumentException;
|
|
|
|
final class CcliPasteParser
|
|
{
|
|
public function __construct(
|
|
private readonly ?Closure $sectionDetector = null,
|
|
private readonly ?Closure $metadataDetector = null,
|
|
) {
|
|
if ($sectionDetector !== null || $metadataDetector !== null) {
|
|
}
|
|
}
|
|
|
|
public function parse(string $rawText): ParsedCcliSong
|
|
{
|
|
if (trim($rawText) === '') {
|
|
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
|
}
|
|
|
|
$lines = array_map(
|
|
fn (string $line): string => trim($line),
|
|
preg_split('/\r\n|\n|\r/', $rawText) ?: [],
|
|
);
|
|
|
|
$isSectionLabel = $this->sectionDetector ?? fn (string $line): bool => CcliLabels::isSectionLabel($line);
|
|
$isMetadataLine = $this->metadataDetector ?? fn (string $line): bool => CcliLabels::isMetadataLine($line);
|
|
|
|
$firstSectionIndex = null;
|
|
foreach ($lines as $index => $line) {
|
|
if ($line !== '' && $isSectionLabel($line)) {
|
|
$firstSectionIndex = $index;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if ($firstSectionIndex === null) {
|
|
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
|
}
|
|
|
|
$headerLines = array_values(array_filter(
|
|
array_slice($lines, 0, $firstSectionIndex),
|
|
fn (string $line): bool => $line !== '',
|
|
));
|
|
|
|
$title = $headerLines[0] ?? '';
|
|
$author = $headerLines[1] ?? null;
|
|
$ccliId = null;
|
|
$year = null;
|
|
$copyrightText = null;
|
|
$sections = [];
|
|
$current = null;
|
|
|
|
foreach (array_slice($lines, $firstSectionIndex) as $line) {
|
|
if ($line === '') {
|
|
continue;
|
|
}
|
|
|
|
if ($isMetadataLine($line)) {
|
|
$extractedCcliId = CcliLabels::extractCcliId($line);
|
|
if ($extractedCcliId !== null) {
|
|
$ccliId = $extractedCcliId;
|
|
}
|
|
|
|
if (str_contains($line, '©')) {
|
|
$copyrightText = $line;
|
|
|
|
if (preg_match('/©\s*(\d{4})/u', $line, $matches)) {
|
|
$year = $matches[1];
|
|
}
|
|
}
|
|
|
|
continue;
|
|
}
|
|
|
|
if ($isSectionLabel($line)) {
|
|
if ($current !== null) {
|
|
$sections[] = $current;
|
|
}
|
|
|
|
$label = CcliLabels::parseLabel($line);
|
|
if ($label === null) {
|
|
continue;
|
|
}
|
|
|
|
$current = [
|
|
'label' => $line,
|
|
'kind' => CcliLabels::normalizeLabelName($label['kind']),
|
|
'rawKind' => $label['kind'],
|
|
'number' => $label['number'],
|
|
'modifier' => $label['modifier'],
|
|
'lines' => [],
|
|
];
|
|
|
|
continue;
|
|
}
|
|
|
|
if ($current !== null) {
|
|
$current['lines'][] = $line;
|
|
}
|
|
}
|
|
|
|
if ($current !== null) {
|
|
$sections[] = $current;
|
|
}
|
|
|
|
$parsedSections = $this->mergeTranslatedSections($sections);
|
|
|
|
if ($parsedSections === []) {
|
|
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
|
}
|
|
|
|
return new ParsedCcliSong(
|
|
title: $title,
|
|
author: $author,
|
|
ccliId: $ccliId,
|
|
year: $year,
|
|
copyrightText: $copyrightText,
|
|
sourceUrl: null,
|
|
sections: $parsedSections,
|
|
);
|
|
}
|
|
|
|
/**
|
|
* @param array<int, array{label: string, kind: string, rawKind: string, number: string|null, modifier: string|null, lines: string[]}> $sections
|
|
* @return ParsedCcliSection[]
|
|
*/
|
|
private function mergeTranslatedSections(array $sections): array
|
|
{
|
|
$merged = [];
|
|
$index = 0;
|
|
|
|
while ($index < count($sections)) {
|
|
$section = $sections[$index];
|
|
$next = $sections[$index + 1] ?? null;
|
|
$linesTranslated = null;
|
|
|
|
if ($next !== null && $this->isTranslatedPair($section, $next)) {
|
|
$linesTranslated = $next['lines'];
|
|
$index++;
|
|
}
|
|
|
|
$merged[] = new ParsedCcliSection(
|
|
label: $section['label'],
|
|
kind: $section['kind'],
|
|
number: $section['number'],
|
|
modifier: $section['modifier'],
|
|
lines: $section['lines'],
|
|
linesTranslated: $linesTranslated,
|
|
);
|
|
|
|
$index++;
|
|
}
|
|
|
|
return $merged;
|
|
}
|
|
|
|
/**
|
|
* @param array{kind: string, rawKind: string, number: string|null} $section
|
|
* @param array{kind: string, rawKind: string, number: string|null} $next
|
|
*/
|
|
private function isTranslatedPair(array $section, array $next): bool
|
|
{
|
|
return mb_strtolower($section['rawKind']) !== mb_strtolower($next['rawKind'])
|
|
&& $this->canonicalLabel($section) === $this->canonicalLabel($next);
|
|
}
|
|
|
|
/**
|
|
* @param array{kind: string, number: string|null} $section
|
|
*/
|
|
private function canonicalLabel(array $section): string
|
|
{
|
|
$label = trim($section['kind'].' '.($section['number'] ?? ''));
|
|
|
|
return mb_strtolower(CcliLabels::normalizeLabelName($label));
|
|
}
|
|
}
|