feat(ccli): implement CcliPasteParser parsing logic
This commit is contained in:
parent
55a3ea3df8
commit
9412ca71c9
1
.sisyphus/evidence/task-6-empty-throws.txt
Normal file
1
.sisyphus/evidence/task-6-empty-throws.txt
Normal file
|
|
@ -0,0 +1 @@
|
|||
CAUGHT: Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.
|
||||
5
.sisyphus/evidence/task-6-en-de-parse.txt
Normal file
5
.sisyphus/evidence/task-6-en-de-parse.txt
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
array:3 [
|
||||
"title" => "Test Song 3"
|
||||
"sections" => 2
|
||||
"has_translation" => true
|
||||
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:4
|
||||
4
.sisyphus/evidence/task-6-repeat-marker.txt
Normal file
4
.sisyphus/evidence/task-6-repeat-marker.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
array:2 [
|
||||
"repeat_sections" => 1
|
||||
"modifier" => "Repeat"
|
||||
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:3
|
||||
4
.sisyphus/evidence/task-6-umlauts.txt
Normal file
4
.sisyphus/evidence/task-6-umlauts.txt
Normal file
|
|
@ -0,0 +1,4 @@
|
|||
array:2 [
|
||||
"title" => "Test Song 15"
|
||||
"has_umlauts" => true
|
||||
] // vendor/psy/psysh/src/ExecutionClosure.php(41) : eval()'d code:3
|
||||
|
|
@ -64,3 +64,23 @@ ### 2026-05-10 CCLI Label Utility Notes
|
|||
- `CcliLabels` works best with a fixed kind list in regexes; no locale config needed for EN/DE normalization.
|
||||
- `normalizeLabelName()` should map only known German kinds and preserve any numeric suffix.
|
||||
- `parseLabel()` can stay lightweight by returning `null` for non-labels and a small array for matched labels.
|
||||
|
||||
### 2026-05-10 Song CCLI Metadata Migration
|
||||
- Song CCLI metadata belongs on `songs` as nullable fields: `imported_from_ccli_at` (timestamp) + `ccli_source_url` (string 500).
|
||||
- Factory state helpers can stay tiny; `fromCcli()` just seeds timestamp + SongSelect URL.
|
||||
- Inference/LSP can lag after edits; a tiny no-op signature change (`fn (): array => [...]`) forced the factory diagnostics to refresh cleanly.
|
||||
|
||||
### 2026-05-10 Settings Language Seed
|
||||
- `SettingsController::AGENDA_KEYS` drives both the index props and the allowed `key` values for PATCH updates.
|
||||
- `default_translation_language` should be validated as a whitelist value (`DE|EN|FR|ES|NL|IT`) only when that setting is being updated.
|
||||
- `CcliSettingsSeeder` must use `Setting::firstOrCreate()` so reseeding does not overwrite a user-changed language.
|
||||
|
||||
### 2026-05-10 CcliPasteParser Scaffold
|
||||
- Mirror `ChurchToolsService` with nullable `Closure` constructor injections and default `= null` values.
|
||||
- This codebase uses `App\Services\DTO\...` namespaces/directories for DTOs, so keep the uppercase `DTO` path aligned with existing services.
|
||||
- Scaffold tests can verify Laravel container resolution without adding any service provider binding.
|
||||
|
||||
### 2026-05-10 CcliPasteParser Implementation
|
||||
- Parser trims pasted lines, treats blank lines as separators, extracts first two header lines as title/author, and excludes CCLI metadata from lyric sections.
|
||||
- EN/DE side-by-side imports merge only adjacent labels with different raw kinds but the same `CcliLabels::normalizeLabelName()` canonical kind/number, preserving German lyrics in `linesTranslated`.
|
||||
- DDEV/Linux path is `tests/fixtures/ccli` (lowercase); macOS accepted `tests/Fixtures/ccli`, but tests must use lowercase for container portability.
|
||||
|
|
|
|||
|
|
@ -2,8 +2,11 @@
|
|||
|
||||
namespace App\Services;
|
||||
|
||||
use App\Services\DTO\ParsedCcliSection;
|
||||
use App\Services\DTO\ParsedCcliSong;
|
||||
use App\Support\CcliLabels;
|
||||
use Closure;
|
||||
use InvalidArgumentException;
|
||||
|
||||
final class CcliPasteParser
|
||||
{
|
||||
|
|
@ -17,17 +20,162 @@ public function __construct(
|
|||
|
||||
public function parse(string $rawText): ParsedCcliSong
|
||||
{
|
||||
if (strlen($rawText) < 0) {
|
||||
if (trim($rawText) === '') {
|
||||
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
||||
}
|
||||
|
||||
$lines = array_map(
|
||||
fn (string $line): string => trim($line),
|
||||
preg_split('/\r\n|\n|\r/', $rawText) ?: [],
|
||||
);
|
||||
|
||||
$isSectionLabel = $this->sectionDetector ?? fn (string $line): bool => CcliLabels::isSectionLabel($line);
|
||||
$isMetadataLine = $this->metadataDetector ?? fn (string $line): bool => CcliLabels::isMetadataLine($line);
|
||||
|
||||
$firstSectionIndex = null;
|
||||
foreach ($lines as $index => $line) {
|
||||
if ($line !== '' && $isSectionLabel($line)) {
|
||||
$firstSectionIndex = $index;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if ($firstSectionIndex === null) {
|
||||
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
||||
}
|
||||
|
||||
$headerLines = array_values(array_filter(
|
||||
array_slice($lines, 0, $firstSectionIndex),
|
||||
fn (string $line): bool => $line !== '',
|
||||
));
|
||||
|
||||
$title = $headerLines[0] ?? '';
|
||||
$author = $headerLines[1] ?? null;
|
||||
$ccliId = null;
|
||||
$year = null;
|
||||
$copyrightText = null;
|
||||
$sections = [];
|
||||
$current = null;
|
||||
|
||||
foreach (array_slice($lines, $firstSectionIndex) as $line) {
|
||||
if ($line === '') {
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($isMetadataLine($line)) {
|
||||
if (preg_match('/CCLI[\s#-]*(\d+)/iu', $line, $matches)) {
|
||||
$ccliId = $matches[1];
|
||||
}
|
||||
|
||||
if (str_contains($line, '©')) {
|
||||
$copyrightText = $line;
|
||||
|
||||
if (preg_match('/©\s*(\d{4})/u', $line, $matches)) {
|
||||
$year = $matches[1];
|
||||
}
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($isSectionLabel($line)) {
|
||||
if ($current !== null) {
|
||||
$sections[] = $current;
|
||||
}
|
||||
|
||||
$label = CcliLabels::parseLabel($line);
|
||||
if ($label === null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
$current = [
|
||||
'label' => $line,
|
||||
'kind' => $label['kind'],
|
||||
'number' => $label['number'],
|
||||
'modifier' => $label['modifier'],
|
||||
'lines' => [],
|
||||
];
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if ($current !== null) {
|
||||
$current['lines'][] = $line;
|
||||
}
|
||||
}
|
||||
|
||||
if ($current !== null) {
|
||||
$sections[] = $current;
|
||||
}
|
||||
|
||||
$parsedSections = $this->mergeTranslatedSections($sections);
|
||||
|
||||
if ($parsedSections === []) {
|
||||
throw new InvalidArgumentException('Keine Sektionen erkannt — bitte vollständige Liedseite einfügen.');
|
||||
}
|
||||
|
||||
return new ParsedCcliSong(
|
||||
title: '',
|
||||
author: null,
|
||||
ccliId: null,
|
||||
year: null,
|
||||
copyrightText: null,
|
||||
title: $title,
|
||||
author: $author,
|
||||
ccliId: $ccliId,
|
||||
year: $year,
|
||||
copyrightText: $copyrightText,
|
||||
sourceUrl: null,
|
||||
sections: [],
|
||||
sections: $parsedSections,
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array<int, array{label: string, kind: string, number: string|null, modifier: string|null, lines: string[]}> $sections
|
||||
* @return ParsedCcliSection[]
|
||||
*/
|
||||
private function mergeTranslatedSections(array $sections): array
|
||||
{
|
||||
$merged = [];
|
||||
$index = 0;
|
||||
|
||||
while ($index < count($sections)) {
|
||||
$section = $sections[$index];
|
||||
$next = $sections[$index + 1] ?? null;
|
||||
$linesTranslated = null;
|
||||
|
||||
if ($next !== null && $this->isTranslatedPair($section, $next)) {
|
||||
$linesTranslated = $next['lines'];
|
||||
$index++;
|
||||
}
|
||||
|
||||
$merged[] = new ParsedCcliSection(
|
||||
label: $section['label'],
|
||||
kind: $section['kind'],
|
||||
number: $section['number'],
|
||||
modifier: $section['modifier'],
|
||||
lines: $section['lines'],
|
||||
linesTranslated: $linesTranslated,
|
||||
);
|
||||
|
||||
$index++;
|
||||
}
|
||||
|
||||
return $merged;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array{kind: string, number: string|null} $section
|
||||
* @param array{kind: string, number: string|null} $next
|
||||
*/
|
||||
private function isTranslatedPair(array $section, array $next): bool
|
||||
{
|
||||
return mb_strtolower($section['kind']) !== mb_strtolower($next['kind'])
|
||||
&& $this->canonicalLabel($section) === $this->canonicalLabel($next);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param array{kind: string, number: string|null} $section
|
||||
*/
|
||||
private function canonicalLabel(array $section): string
|
||||
{
|
||||
$label = trim($section['kind'].' '.($section['number'] ?? ''));
|
||||
|
||||
return mb_strtolower(CcliLabels::normalizeLabelName($label));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
use App\Services\DTO\ParsedCcliSong;
|
||||
|
||||
test('CcliPasteParser can be instantiated with no arguments', function (): void {
|
||||
$parser = new CcliPasteParser;
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
expect($parser)->toBeInstanceOf(CcliPasteParser::class);
|
||||
});
|
||||
|
|
@ -26,9 +26,9 @@
|
|||
});
|
||||
|
||||
test('CcliPasteParser::parse returns ParsedCcliSong DTO', function (): void {
|
||||
$parser = new CcliPasteParser;
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
$result = $parser->parse('some text');
|
||||
$result = $parser->parse("Test Song\nTest Artist\n\nVerse 1\nSome text");
|
||||
|
||||
expect($result)->toBeInstanceOf(ParsedCcliSong::class);
|
||||
});
|
||||
|
|
|
|||
132
tests/Feature/CcliPasteParserTest.php
Normal file
132
tests/Feature/CcliPasteParserTest.php
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
<?php
|
||||
|
||||
use App\Services\CcliPasteParser;
|
||||
use App\Services\DTO\ParsedCcliSection;
|
||||
use App\Services\DTO\ParsedCcliSong;
|
||||
|
||||
function ccliFixturePath(string $filename): string
|
||||
{
|
||||
return base_path("tests/fixtures/ccli/{$filename}");
|
||||
}
|
||||
|
||||
function ccliFixtureContent(string $filename): string
|
||||
{
|
||||
return file_get_contents(ccliFixturePath($filename));
|
||||
}
|
||||
|
||||
test('each fixture parses into a valid ParsedCcliSong DTO', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
foreach (glob(base_path('tests/fixtures/ccli/*.txt')) as $path) {
|
||||
$filename = basename($path);
|
||||
$result = $parser->parse(ccliFixtureContent($filename));
|
||||
|
||||
expect($result)->toBeInstanceOf(ParsedCcliSong::class);
|
||||
expect($result->title)->not->toBeEmpty("Fixture {$filename}: title should not be empty");
|
||||
expect($result->sections)->not->toBeEmpty("Fixture {$filename}: should have at least one section");
|
||||
|
||||
foreach ($result->sections as $section) {
|
||||
expect($section)->toBeInstanceOf(ParsedCcliSection::class);
|
||||
expect($section->kind)->not->toBeEmpty("Fixture {$filename}: section kind should not be empty");
|
||||
expect($section->lines)->not->toBeEmpty("Fixture {$filename}: section should have lines");
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
test('english-only-multi-verse.txt parses 4+ sections without translation', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('english-only-multi-verse.txt'));
|
||||
|
||||
expect(count($result->sections))->toBeGreaterThanOrEqual(4);
|
||||
expect($result->ccliId)->not->toBeNull();
|
||||
|
||||
$hasTranslated = false;
|
||||
foreach ($result->sections as $section) {
|
||||
if ($section->linesTranslated !== null) {
|
||||
$hasTranslated = true;
|
||||
}
|
||||
}
|
||||
|
||||
expect($hasTranslated)->toBeFalse('English-only should have no linesTranslated');
|
||||
});
|
||||
|
||||
test('english-german-side-by-side.txt extracts both languages per section', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('english-german-side-by-side.txt'));
|
||||
|
||||
$translatedSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => $section->linesTranslated !== null);
|
||||
expect(count($translatedSections))->toBeGreaterThanOrEqual(1, 'Should have at least 1 section with translation');
|
||||
|
||||
$first = array_values($translatedSections)[0];
|
||||
expect($first->lines)->not->toBeEmpty();
|
||||
expect($first->linesTranslated)->not->toBeEmpty();
|
||||
});
|
||||
|
||||
test('german-only.txt detects German section labels', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('german-only.txt'));
|
||||
|
||||
$kinds = array_map(fn (ParsedCcliSection $section): string => $section->kind, $result->sections);
|
||||
$hasGermanKind = array_filter($kinds, fn (string $kind): bool => in_array(mb_strtolower($kind), ['strophe', 'refrain', 'brücke'], true));
|
||||
expect(count($hasGermanKind))->toBeGreaterThanOrEqual(1, 'Should detect at least one German section label');
|
||||
});
|
||||
|
||||
test('repeat-marker.txt preserves modifier in section DTO', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('repeat-marker.txt'));
|
||||
|
||||
$repeatSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => $section->modifier !== null);
|
||||
expect(count($repeatSections))->toBeGreaterThanOrEqual(1, 'Should have at least 1 section with Repeat modifier');
|
||||
});
|
||||
|
||||
test('umlauts.txt preserves Unicode characters', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('umlauts.txt'));
|
||||
|
||||
$allText = $result->title;
|
||||
foreach ($result->sections as $section) {
|
||||
$allText .= implode(' ', $section->lines);
|
||||
}
|
||||
|
||||
expect((bool) preg_match('/[äöüßÄÖÜ]/u', $allText))->toBeTrue('Umlauts should be preserved');
|
||||
});
|
||||
|
||||
test('missing-copyright.txt returns null copyrightText', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('missing-copyright.txt'));
|
||||
|
||||
expect($result->ccliId)->not->toBeNull('CCLI ID should still be extracted');
|
||||
expect($result->copyrightText)->toBeNull('No © line should mean null copyrightText');
|
||||
expect($result->year)->toBeNull('No © means no year either');
|
||||
});
|
||||
|
||||
test('5-verses.txt handles 5 verse sections correctly', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
$result = $parser->parse(ccliFixtureContent('5-verses.txt'));
|
||||
|
||||
$verseSections = array_filter($result->sections, fn (ParsedCcliSection $section): bool => in_array(mb_strtolower($section->kind), ['verse', 'strophe'], true));
|
||||
expect(count($verseSections))->toBeGreaterThanOrEqual(5, 'Should have 5 verse sections');
|
||||
});
|
||||
|
||||
test('parse throws InvalidArgumentException on empty input', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
expect(fn () => $parser->parse(''))->toThrow(InvalidArgumentException::class);
|
||||
});
|
||||
|
||||
test('parse throws InvalidArgumentException on text with no section labels', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
expect(fn () => $parser->parse('Just some random text without any section labels'))->toThrow(InvalidArgumentException::class);
|
||||
});
|
||||
|
||||
test('parse error messages are in German', function (): void {
|
||||
$parser = new CcliPasteParser();
|
||||
|
||||
try {
|
||||
$parser->parse('');
|
||||
} catch (InvalidArgumentException $exception) {
|
||||
expect($exception->getMessage())->toMatch('/[A-Za-zÄÖÜäöü]/u');
|
||||
expect($exception->getMessage())->not->toContain('Error:');
|
||||
}
|
||||
});
|
||||
Loading…
Reference in a new issue