feat(ccli): implement CcliPasteParser parsing logic

This commit is contained in:
Thorsten Bus 2026-05-10 18:49:18 +02:00
parent 55a3ea3df8
commit 9412ca71c9
8 changed files with 324 additions and 10 deletions

View file

@ -0,0 +1 @@
CAUGHT: Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.

View file

@ -0,0 +1,5 @@
array:3 [
"title" => "Test Song 3"
"sections" => 2
"has_translation" => true
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:4

View file

@ -0,0 +1,4 @@
array:2 [
"repeat_sections" => 1
"modifier" => "Repeat"
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:3

View file

@ -0,0 +1,4 @@
array:2 [
"title" => "Test Song 15"
"has_umlauts" => true
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:3

View file

@ -64,3 +64,23 @@ ### 2026-05-10 CCLI Label Utility Notes
- `CcliLabels` works best with a fixed kind list in regexes; no locale config needed for EN/DE normalization. - `CcliLabels` works best with a fixed kind list in regexes; no locale config needed for EN/DE normalization.
- `normalizeLabelName()` should map only known German kinds and preserve any numeric suffix. - `normalizeLabelName()` should map only known German kinds and preserve any numeric suffix.
- `parseLabel()` can stay lightweight by returning `null` for non-labels and a small array for matched labels. - `parseLabel()` can stay lightweight by returning `null` for non-labels and a small array for matched labels.
### 2026-05-10 Song CCLI Metadata Migration
- Song CCLI metadata belongs on `songs` as nullable fields: `imported_from_ccli_at` (timestamp) + `ccli_source_url` (string 500).
- Factory state helpers can stay tiny; `fromCcli()` just seeds timestamp + SongSelect URL.
- Inference/LSP can lag after edits; a tiny no-op signature change (`fn (): array => [...]`) forced the factory diagnostics to refresh cleanly.
### 2026-05-10 Settings Language Seed
- `SettingsController::AGENDA_KEYS` drives both the index props and the allowed `key` values for PATCH updates.
- `default_translation_language` should be validated as a whitelist value (`DE|EN|FR|ES|NL|IT`) only when that setting is being updated.
- `CcliSettingsSeeder` must use `Setting::firstOrCreate()` so reseeding does not overwrite a user-changed language.
### 2026-05-10 CcliPasteParser Scaffold
- Mirror `ChurchToolsService` with nullable `Closure` constructor injections and default `= null` values.
- This codebase uses `App\Services\DTO\...` namespaces/directories for DTOs, so keep the uppercase `DTO` path aligned with existing services.
- Scaffold tests can verify Laravel container resolution without adding any service provider binding.
### 2026-05-10 CcliPasteParser Implementation
- Parser trims pasted lines, treats blank lines as separators, extracts first two header lines as title/author, and excludes CCLI metadata from lyric sections.
- EN/DE side-by-side imports merge only adjacent labels with different raw kinds but the same `CcliLabels::normalizeLabelName()` canonical kind/number, preserving German lyrics in `linesTranslated`.
- DDEV/Linux path is `tests/fixtures/ccli` (lowercase); macOS accepted `tests/Fixtures/ccli`, but tests must use lowercase for container portability.

View file

@ -2,8 +2,11 @@
namespace App\Services; namespace App\Services;
use App\Services\DTO\ParsedCcliSection;
use App\Services\DTO\ParsedCcliSong; use App\Services\DTO\ParsedCcliSong;
use App\Support\CcliLabels;
use Closure; use Closure;
use InvalidArgumentException;
final class CcliPasteParser final class CcliPasteParser
{ {
@ -17,17 +20,162 @@ public function __construct(
public function parse(string $rawText): ParsedCcliSong public function parse(string $rawText): ParsedCcliSong
{ {
if (strlen($rawText) < 0) { if (trim($rawText) === '') {
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
}
$lines = array_map(
fn (string $line): string => trim($line),
preg_split('/\r\n|\n|\r/', $rawText) ?: [],
);
$isSectionLabel = $this->sectionDetector ?? fn (string $line): bool => CcliLabels::isSectionLabel($line);
$isMetadataLine = $this->metadataDetector ?? fn (string $line): bool => CcliLabels::isMetadataLine($line);
$firstSectionIndex = null;
foreach ($lines as $index => $line) {
if ($line !== '' && $isSectionLabel($line)) {
$firstSectionIndex = $index;
break;
}
}
if ($firstSectionIndex === null) {
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
}
$headerLines = array_values(array_filter(
array_slice($lines, 0, $firstSectionIndex),
fn (string $line): bool => $line !== '',
));
$title = $headerLines[0] ?? '';
$author = $headerLines[1] ?? null;
$ccliId = null;
$year = null;
$copyrightText = null;
$sections = [];
$current = null;
foreach (array_slice($lines, $firstSectionIndex) as $line) {
if ($line === '') {
continue;
}
if ($isMetadataLine($line)) {
if (preg_match('/CCLI[\s#-]*(\d+)/iu', $line, $matches)) {
$ccliId = $matches[1];
}
if (str_contains($line, '©')) {
$copyrightText = $line;
if (preg_match('/©\s*(\d{4})/u', $line, $matches)) {
$year = $matches[1];
}
}
continue;
}
if ($isSectionLabel($line)) {
if ($current !== null) {
$sections[] = $current;
}
$label = CcliLabels::parseLabel($line);
if ($label === null) {
continue;
}
$current = [
'label' => $line,
'kind' => $label['kind'],
'number' => $label['number'],
'modifier' => $label['modifier'],
'lines' => [],
];
continue;
}
if ($current !== null) {
$current['lines'][] = $line;
}
}
if ($current !== null) {
$sections[] = $current;
}
$parsedSections = $this->mergeTranslatedSections($sections);
if ($parsedSections === []) {
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
} }
return new ParsedCcliSong( return new ParsedCcliSong(
title: '', title: $title,
author: null, author: $author,
ccliId: null, ccliId: $ccliId,
year: null, year: $year,
copyrightText: null, copyrightText: $copyrightText,
sourceUrl: null, sourceUrl: null,
sections: [], sections: $parsedSections,
); );
} }
/**
* @param array<int, array{label: string, kind: string, number: string|null, modifier: string|null, lines: string[]}> $sections
* @return ParsedCcliSection[]
*/
private function mergeTranslatedSections(array $sections): array
{
$merged = [];
$index = 0;
while ($index < count($sections)) {
$section = $sections[$index];
$next = $sections[$index + 1] ?? null;
$linesTranslated = null;
if ($next !== null && $this->isTranslatedPair($section, $next)) {
$linesTranslated = $next['lines'];
$index++;
}
$merged[] = new ParsedCcliSection(
label: $section['label'],
kind: $section['kind'],
number: $section['number'],
modifier: $section['modifier'],
lines: $section['lines'],
linesTranslated: $linesTranslated,
);
$index++;
}
return $merged;
}
/**
* @param array{kind: string, number: string|null} $section
* @param array{kind: string, number: string|null} $next
*/
private function isTranslatedPair(array $section, array $next): bool
{
return mb_strtolower($section['kind']) !== mb_strtolower($next['kind'])
&& $this->canonicalLabel($section) === $this->canonicalLabel($next);
}
/**
* @param array{kind: string, number: string|null} $section
*/
private function canonicalLabel(array $section): string
{
$label = trim($section['kind'].' '.($section['number'] ?? ''));
return mb_strtolower(CcliLabels::normalizeLabelName($label));
}
} }

View file

@ -5,7 +5,7 @@
use App\Services\DTO\ParsedCcliSong; use App\Services\DTO\ParsedCcliSong;
test('CcliPasteParser can be instantiated with no arguments', function (): void { test('CcliPasteParser can be instantiated with no arguments', function (): void {
$parser = new CcliPasteParser; $parser = new CcliPasteParser();
expect($parser)->toBeInstanceOf(CcliPasteParser::class); expect($parser)->toBeInstanceOf(CcliPasteParser::class);
}); });
@ -26,9 +26,9 @@
}); });
test('CcliPasteParser::parse returns ParsedCcliSong DTO', function (): void { test('CcliPasteParser::parse returns ParsedCcliSong DTO', function (): void {
$parser = new CcliPasteParser; $parser = new CcliPasteParser();
$result = $parser->parse('some text'); $result = $parser->parse("Test Song\nTest Artist\n\nVerse 1\nSome text");
expect($result)->toBeInstanceOf(ParsedCcliSong::class); expect($result)->toBeInstanceOf(ParsedCcliSong::class);
}); });

View file

@ -0,0 +1,132 @@
<?php
use App\Services\CcliPasteParser;
use App\Services\DTO\ParsedCcliSection;
use App\Services\DTO\ParsedCcliSong;
function ccliFixturePath(string $filename): string
{
return base_path("tests/fixtures/ccli/{$filename}");
}
function ccliFixtureContent(string $filename): string
{
return file_get_contents(ccliFixturePath($filename));
}
test('each fixture parses into a valid ParsedCcliSong DTO', function (): void {
$parser = new CcliPasteParser();
foreach (glob(base_path('tests/fixtures/ccli/*.txt')) as $path) {
$filename = basename($path);
$result = $parser->parse(ccliFixtureContent($filename));
expect($result)->toBeInstanceOf(ParsedCcliSong::class);
expect($result->title)->not->toBeEmpty("Fixture {$filename}: title should not be empty");
expect($result->sections)->not->toBeEmpty("Fixture {$filename}: should have at least one section");
foreach ($result->sections as $section) {
expect($section)->toBeInstanceOf(ParsedCcliSection::class);
expect($section->kind)->not->toBeEmpty("Fixture {$filename}: section kind should not be empty");
expect($section->lines)->not->toBeEmpty("Fixture {$filename}: section should have lines");
}
}
});
test('english-only-multi-verse.txt parses 4+ sections without translation', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('english-only-multi-verse.txt'));
expect(count($result->sections))->toBeGreaterThanOrEqual(4);
expect($result->ccliId)->not->toBeNull();
$hasTranslated = false;
foreach ($result->sections as $section) {
if ($section->linesTranslated !== null) {
$hasTranslated = true;
}
}
expect($hasTranslated)->toBeFalse('English-only should have no linesTranslated');
});
test('english-german-side-by-side.txt extracts both languages per section', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('english-german-side-by-side.txt'));
$translatedSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => $section->linesTranslated !== null);
expect(count($translatedSections))->toBeGreaterThanOrEqual(1, 'Should have at least 1 section with translation');
$first = array_values($translatedSections)[0];
expect($first->lines)->not->toBeEmpty();
expect($first->linesTranslated)->not->toBeEmpty();
});
test('german-only.txt detects German section labels', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('german-only.txt'));
$kinds = array_map(fn (ParsedCcliSection $section): string => $section->kind, $result->sections);
$hasGermanKind = array_filter($kinds, fn (string $kind): bool => in_array(mb_strtolower($kind), ['strophe', 'refrain', 'brücke'], true));
expect(count($hasGermanKind))->toBeGreaterThanOrEqual(1, 'Should detect at least one German section label');
});
test('repeat-marker.txt preserves modifier in section DTO', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('repeat-marker.txt'));
$repeatSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => $section->modifier !== null);
expect(count($repeatSections))->toBeGreaterThanOrEqual(1, 'Should have at least 1 section with Repeat modifier');
});
test('umlauts.txt preserves Unicode characters', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('umlauts.txt'));
$allText = $result->title;
foreach ($result->sections as $section) {
$allText .= implode(' ', $section->lines);
}
expect((bool) preg_match('/[äöüßÄÖÜ]/u', $allText))->toBeTrue('Umlauts should be preserved');
});
test('missing-copyright.txt returns null copyrightText', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('missing-copyright.txt'));
expect($result->ccliId)->not->toBeNull('CCLI ID should still be extracted');
expect($result->copyrightText)->toBeNull('No © line should mean null copyrightText');
expect($result->year)->toBeNull('No © means no year either');
});
test('5-verses.txt handles 5 verse sections correctly', function (): void {
$parser = new CcliPasteParser();
$result = $parser->parse(ccliFixtureContent('5-verses.txt'));
$verseSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => in_array(mb_strtolower($section->kind), ['verse', 'strophe'], true));
expect(count($verseSections))->toBeGreaterThanOrEqual(5, 'Should have 5 verse sections');
});
test('parse throws InvalidArgumentException on empty input', function (): void {
$parser = new CcliPasteParser();
expect(fn () => $parser->parse(''))->toThrow(InvalidArgumentException::class);
});
test('parse throws InvalidArgumentException on text with no section labels', function (): void {
$parser = new CcliPasteParser();
expect(fn () => $parser->parse('Just some random text without any section labels'))->toThrow(InvalidArgumentException::class);
});
test('parse error messages are in German', function (): void {
$parser = new CcliPasteParser();
try {
$parser->parse('');
} catch (InvalidArgumentException $exception) {
expect($exception->getMessage())->toMatch('/[A-Za-zÄÖÜäöü]/u');
expect($exception->getMessage())->not->toContain('Error:');
}
});