#!/usr/bin/perl -w #--------------------------------------------------------------------------- use strict; use App::Input::InputController; use Bible20::TheWord::Reader; use Date::Pcalc; use File::Copy; use File::CsvReader; use File::Spec; use File::XmlReader; use File::XmlWriter; use Logging::Debug; use LStrings::LStringsIO; use constant { _LEFTIMAGE => "leftimage", _RIGHTIMAGE => "rightimage", _TITLE => "title", _START => "start", _END => "end", _TABLE => "table", _COPYRIGHT1 => "copyright1", _COPYRIGHTSEP => "copyrightsep", _COPYRIGHT2 => "copyright2", _LSTRINGS => "/steeb/helmut/prj/Bible20/theword/dict/LStrings.xml", }; my $gLStrings = 0; sub usage { my $msg = shift; if ($msg) { print "$msg\n"; } print <Ja, Herr, to wrap behind "Herr,") v (0|1|2) verbosity level Example: perl -I Perl5lib/ print/bin/twdFillTheWordMonthlyTemplateOdt.pl\\ -infile theword/txt/th/th_ThaiHolyBible1971_2011.twd\\ -infile2 theword/txt/de/de_Schlachter2000_2011.twd\\ -template print/TheWordMonthly/th_TheWord_Monthly.odt\\ -processfile print/TheWordMonthly/process.xml -month 2011-02 -o /tmp/th_TheWordMonthly_2011-02.odt EOUSAGE exit; } =item ParseProcessFile @return ProcessSpecs->{$bible}->{$parolID} = [ [$action, $prefix], ...] where $action is one of "wrap", "unwrap", "small", $prefix is a fragment of a verse. =cut sub ParseProcessFile { my $content = shift or die; my %PROCESSSPECS; my @bibles = ($content =~ m| \s* (.*?) |gxso); while (my ($bible, $specs) = splice(@bibles, 0, 2)) { #MsgDebug("ParseProcessFile $bible"); # for now, just support # # der die das, # to indicate wrapping in Gn1v3 text behind the contents of . # der die das, # to join lines in Gn1v3 text behind the contents of . # der die das, # to use smaller font for the line in Gn1v3 text starting (!) with the contents of . while ($specs =~ m@<((?:un)?wrap|small)\s+id\s*=\s*["'](.*?)['"].*?>(.*?)@sg) { my ($action, $parolID, $prefix) = ($1, $2, $3); push @{$PROCESSSPECS{$bible}->{$parolID} ||= []}, [$action, $prefix]; #print "ParseProcessFile: $bible - $parolID - $action - $prefix\n"; } } return \%PROCESSSPECS; } # result must look like # # # พระคำสำหรับวันศุกร์, วันที่ 7 มกราคม 2011 # Ich will ihre Abtrünnigkeit heilen, # Hosea 14,5 # sub ExtractPatternsFromContentXml { my $content = shift or die; # content.xml contains # - $tablePattern = one # - with 3 # - with 3 each # Of the 9 cells, 4 must be filled (1 = top left, 3 = top right, 7 = bottom left, 9 = bottom right) # # 1. from $tablePattern, extract XML parts to insert later # 2. in $tablePattern replace the 4 cells by {{1}}, ... {{4}} for later replacement # my ($tablePattern) = $content =~m@()@s; # [TablePattern] if (!$tablePattern) { die "Table for verses not detected in content.xml file (...)"; } my %PATTERNS; my $curCell = 0; my $resCell = 0; my $Prepare = sub { my ($start, $content, $end) = @_; ++$curCell; if ($curCell == 1 || $curCell == 3 || $curCell == 7 || $curCell == 9) { # 1. extract XML parts if ($curCell == 1) { # Cell.TopLeft MsgDebug("Prepare $curCell:$resCell => start, end, copyright"); $PATTERNS{_START()} = $start; $PATTERNS{_END()} = $end; ($PATTERNS{_COPYRIGHT1()}, $PATTERNS{_COPYRIGHTSEP()}, $PATTERNS{_COPYRIGHT2()}) = $content =~ m@ (.*?) ((?:]+/>\s*){3,}) (.*) @xs; } elsif ($curCell == 3) { # Cell.TopRight MsgDebug("Prepare $curCell:$resCell => title"); $PATTERNS{_TITLE()} = $start. $content . $end; } elsif ($curCell == 7 || $curCell == 9) { # Cell.BottomLeft / Cell.BottomRight my $part = ($curCell == 7) ? _LEFTIMAGE() : _RIGHTIMAGE(); my ($frame) = $content =~ m@()@s; MsgDebug("Prepare $curCell:$resCell => $part"); $PATTERNS{$part} = $frame; } else {# be defensive die "Unhandled cell #$curCell"; } # 2. replace - insert marker like "{{1}}" into $tablePattern ++$resCell; return "{{$resCell}}" } MsgDebug("Prepare $curCell:$resCell => skip"); return $start . $content . $end; }; # sub Prepare $tablePattern =~ s| (\s*) (.*?) () | $Prepare->($1, $2, $3); |gxse ; # ensure that all required XML parts have been extracted #2013-12-24 HS skip for test # foreach my $part (qw(start end title leftimage rightimage)) { # die "Missing $part" unless $PATTERNS{$part}; # } $PATTERNS{table} = $tablePattern; return \%PATTERNS; } sub ProcessPatterns { my $TheWordFile1 = shift or die; my $TheWordFile2 = shift or die; my $month = shift or die; my $Patterns = shift or die; my $title = $Patterns->{_TITLE()}; my $title1 = $gLStrings->LookupKeyForLang("TheWord", $TheWordFile1->{lang}) || ""; my $title2 = $gLStrings->LookupKeyForLang("TheWord", $TheWordFile2->{lang}) || ""; $title =~ s@%TheWord1;@$title1@g; $title =~ s@%TheWord2;@$title2@g; my $nYear = 0 + substr($month, 0, 4) or die; my $nMonth = 0 + substr($month, 5, 2) or die; my $id = "_" . (130 + $nMonth); # "_131" = MonthJan, etc. MsgDebug("ProcessPatterns: getting month LStrings for id #$id"); my $month1 = $gLStrings->getLById($id, $TheWordFile1->{lang}) or die; my $month2 = $gLStrings->getLById($id, $TheWordFile2->{lang}) or die; $title =~ s@%Month1;@$month1@g; $title =~ s@%Month2;@$month2@g; $title =~ s@%Year;@$nYear@g; $Patterns->{_TITLE()} = $title; return $Patterns; } sub ComputeLine { my $style = shift or die; my $line = shift or die "Missing line for style $style"; # drop $line =~ s@@@g; return "$line\n"; } sub ComputeParol { my $Parol = shift or die; my $ProcessSpecForBible = shift; my $parolID = $Parol->{id}; my $res = ""; my $Specs = ($ProcessSpecForBible && $ProcessSpecForBible->{$parolID}) || []; # optionally compute intro line my $intro = $Parol->{intro}; if ($intro) { # apply to intro my $style = "TheWord.IL"; # default foreach my $Spec (@$Specs) { my ($action, $spec) = @$Spec; if ($action eq "small") { if ($intro =~ m@^\Q$spec\E@) { $style = "TheWord.smallIL"; MsgDebug("ComputeParol $parolID: introline of '$spec' small."); last; } } } $res .= ComputeLine($style, $intro); } my $text = $Parol->{text}; # optionally process the multi-line text to get better line lengths foreach my $Spec (@$Specs) { my ($action, $spec) = @$Spec; if ($action eq "wrap") { if ($text =~ s@(\Q$spec\E)\s*@$1\n@g) { # replace spaces by line-break MsgDebug("ComputeParol $parolID: wrapped behind '$spec'."); } else { MsgError("ComputeParol $parolID: wrap pattern '$spec' does not match."); } } elsif ($action eq "unwrap") { if ($text =~ s@(\Q$spec\E)\n@$1 @g) { # replace line-break by space MsgDebug("ComputeParol $parolID: unwrapped behind '$spec'."); } else { MsgError("ComputeParol $parolID: unwrap pattern '$spec' does not match."); } } elsif ($action eq "small") { if ($text =~ s@^(\Q$spec\E.*)$@SMALL$1@mg) { # insert "SMALL" prefix MsgDebug("ComputeParol $parolID: line of '$spec' small."); } else { MsgError("ComputeParol $parolID: small pattern '$spec' does not match."); } } else { die "Unknown action $action"; } } # compute text lines foreach my $line (split(/\n/, $text)) { my $style = ($line =~ s@^SMALL@@) ? "TheWord.smallL" : "TheWord.L"; $res .= ComputeLine($style, $line); } # compute source line $res .= ComputeLine("TheWord.SL", $Parol->{ref}); return $res; } sub ComputeTheWord { my $TheWord = shift or die; my %ARGS = @_; my $res = ""; my $withTitle = $ARGS{withTitle}; $withTitle = 1 unless defined($withTitle); if ($withTitle) { $res = ComputeLine("TheWord.Date", $TheWord->{title}); } my $ProcessSpecForBible = $ARGS{ProcessSpecForBible}; my $Parols = $TheWord->{parols}; $res .= ComputeParol($Parols->[0], $ProcessSpecForBible) . ComputeParol($Parols->[1], $ProcessSpecForBible); return $res; } sub ComputeContentPages { my $TheWordFile1 = shift or die; my $TheWordFile2 = shift or die; my $month = shift or die; my $Patterns = shift or die; my $ProcessSpecs = shift; my ($nYear, $nMonth) = split("-", $month); my $nDaysInMonth = Date::Pcalc::Days_in_Month($nYear, $nMonth); my @CONTENTPAGES = ( {type => "title" } # title page in front ); { # === compute dates to insert into content.xml === # @DATES = ("2011-01-01", "2011-01-02", ..., "2011-01-31") my @DATES; # [Dates1Based] { MsgDebug("Format: $nDaysInMonth days in month $month"); for (my $n = 1; $n <= $nDaysInMonth; ++$n) { $DATES[$n] = $month . sprintf("-%02d", $n); # [Dates1Based] } } # compute map for accessing TheWord by date string my %DATE2THEWORD; foreach my $TheWord (@{$TheWordFile1->{days}}) { $DATE2THEWORD{$TheWord->{date}}->[0] = $TheWord; } foreach my $TheWord (@{$TheWordFile2->{days}}) { $DATE2THEWORD{$TheWord->{date}}->[1] = $TheWord; } # compute cell contents for TheWord for (my $n = 1; $n <= $nDaysInMonth; ++$n) { # [Dates1Based] my $date = $DATES[$n]; my $Pair = $DATE2THEWORD{$date}; die "Missing The Word entries for $date in infiles" unless $Pair; my ($TheWord1, $TheWord2) = @$Pair; die "Missing The Word entry for $date in infile1" unless $TheWord1; die "Missing The Word entry for $date in infile2" unless $TheWord2; my $text = ComputeTheWord($TheWord1, ProcessSpecForBible => $ProcessSpecs->{$TheWordFile1->{bible}}) . "" . ComputeTheWord($TheWord2, ProcessSpecForBible => $ProcessSpecs->{$TheWordFile2->{bible}}, withTitle => 0); my $dow = Date::Pcalc::Day_of_Week($nYear, $nMonth, $n); $CONTENTPAGES[$n] = { type => "twd", content => $text, dow => $dow, }; } # set type of remaining empty content pages for (my $n = $nDaysInMonth+1; $n <= 31; ++$n) { # [Dates1Based] $CONTENTPAGES[$n] = { type => "empty"}; } } # add copyright lines if ($nDaysInMonth < 31) { # put into last page MsgDebug("Adding license texts at day 31."); $CONTENTPAGES[31] = { type => "twd", content => $Patterns->{_COPYRIGHTSEP()} . $Patterns->{_COPYRIGHT1()} . $Patterns->{_COPYRIGHTSEP()} . $Patterns->{_COPYRIGHT2()} . $Patterns->{_COPYRIGHTSEP()}, }; } else { # append in shortest pages my @NDAYLINES0; for (my $n = 1; $n <= $nDaysInMonth; ++$n) { # [Dates1Based] my @P = $CONTENTPAGES[$n]->{content} =~ m/( 0 + scalar(@P), "dayInMonth" => $n, }; } # sort by page length => $NDAYLINES[0] = shortest page (= fewest lines) @NDAYLINES0 = sort { $a->{nLines} <=> $b->{nLines} } @NDAYLINES0; # append copyright texts foreach my $top (0, 1) { my $dayInMonth = $NDAYLINES0[$top]->{dayInMonth}; MsgDebug("Adding license text #$top at day $dayInMonth after " . $NDAYLINES0[$top]->{nLines} . " lines."); # assume there is enough room for separator (it looks much better) $CONTENTPAGES[$dayInMonth]->{content} .= $Patterns->{_COPYRIGHTSEP()} . $Patterns->{($top == 0) ? _COPYRIGHT1() : _COPYRIGHT2()}; } } return \@CONTENTPAGES; } sub ReorderContentPages { my $ContentPages = shift or die; my $sequence = shift; # re-order cell contents for booklet my @PAPERPAGES; $#PAPERPAGES = 31; # ensure all paper pages are represented in array # DEBUG my $i = 0;foreach (@$ContentPages) { $_->{n} = ++$i; } # Sequence of content page numbers for booklet of 4 sheets, front + rear, 4 content pages per sheet page. # Title page has page number 1. my @PAGESEQUENCE = $sequence ? # front 1,2,3,4, rear 1,2,3,4 ( 32, 1, 30, 3, 28, 5, 26, 7, 24, 9, 22, 11, 20, 13, 18, 15, 2, 31, 4, 29, 6, 27, 8, 25, 10, 23, 12, 21, 14, 19, 16, 17, ) : # front 1, rear 1, ... front 4, rear 4 ( 32, 1, 30, 3, 2, 31, 4, 29, 28, 5, 26, 7, 6, 27, 8, 25, 24, 9, 22, 11, 10, 23, 12, 21, 20, 13, 18, 15, 14, 19, 16, 17, ); my $nPaperPages = scalar(@PAGESEQUENCE); for (my $i = 0; $i < $nPaperPages; ++$i) { # $c== 0: $PAPERPAGES[ 0] = $CELLS[ 1] # $c== 1: $PAPERPAGES[ 1] = $CELLS[30] # $c== 5: $PAPERPAGES[ 5] = $CELLS[ 0] # $c==31: $PAPERPAGES[31] = $CELLS[14] # may leave some @PAPERPAGES undefined (at end of month) $PAPERPAGES[$i] = $ContentPages->[$PAGESEQUENCE[$i]-1]; } #DEBUG foreach (@PAPERPAGES) { print $_->{n}. $_->{type}."\t"; } print "\n"; die; return \@PAPERPAGES; } sub FormatCell { my $Patterns = shift or die; my $Page = shift or die; my $nPaperPage = shift or die; # 1-based my $nCell = shift or die; # 1-based MsgDebug("FormatCell $nPaperPage:$nCell"); my $res; my $type = $Page->{type}; if ($type eq "twd") { # insert text cell my $cell = $Page->{content}; # MsgDebug(" twd page " . $Page->{n}); # DEBUG #2013-12-24 HS skip for test # if (exists($Page->{dow}) && $Page->{dow} == 7) { # # mark Sunday by inserting frame (which contains an image) # # on left side, take leftimage; on right side, take rightimage # my $frame = ($nCell % 2) ? $Patterns->{_LEFTIMAGE()} : $Patterns->{_RIGHTIMAGE()}; # # insert frame at beginning of first of cell # $cell =~ s/()/$1$frame/s; # MsgDebug(" with frame for sunday"); # } $res = $Patterns->{_START()} . $cell . $Patterns->{_END()}; } elsif ($type eq "title") { MsgDebug(" title page"); $res = $Patterns->{_TITLE()}; } elsif ($type eq "empty") { MsgDebug(" empty page"); $res = $Patterns->{_START()} . $Patterns->{_END()}; } else { die "Unknown page type $type"; } # MsgDebug(" done FormatCell\n$res"); return $res; } sub FormatSheetPages { my $Patterns = shift or die; my $PaperPages = shift or die; MsgDebug("FormatSheetPages..."); my $resTables = ""; my $nSheetPages = scalar(@$PaperPages) / 4; for (my $nSheetPage = 1; $nSheetPage <= $nSheetPages; ++$nSheetPage) { # 1-based # replace marker like "{{1}}" by text # loops over all markers even in last page ($PaperPages may be empty at end of short month) my $tablePattern = $Patterns->{_TABLE()}; for (my $nCell = 1; $nCell <= 4; ++$nCell) { # 1-based my $nPaperPage = ($nSheetPage - 1) * 4 + $nCell; my $Page = $PaperPages->[$nPaperPage-1]; my $formatted = FormatCell($Patterns, $Page, $nSheetPage, $nCell); $tablePattern =~ s@\{\{$nCell\}\}@$formatted@; } $resTables .= $tablePattern; } MsgDebug(" done FormatSheetPages."); return $resTables; } sub ReportParolIDWithUnderscore { my $Parol = shift or die; if ($Parol->{id} =~ m/_/) { MsgInfo("Warning: parolID \t$Parol->{id} \tcontains underscore - ensure correct id used in process.xml"); } } sub ReportParolIDsWithUnderscore { my $TheWordFile1 = shift or die; my $month = shift or die; foreach my $TheWord (@{$TheWordFile1->{days}}) { next unless $TheWord->{date} =~ m/\A$month/; my $Parols = $TheWord->{parols}; ReportParolIDWithUnderscore($Parols->[0]); ReportParolIDWithUnderscore($Parols->[1]); } } sub Format { my %ARGS = @_; my $month = $ARGS{month} or die; my $content = $ARGS{content} or die; my $TheWordFile1 = $ARGS{TheWordFile1} or die; my $TheWordFile2 = $ARGS{TheWordFile2} or die; my $ProcessSpecs = $ARGS{ProcessSpecs}; my $sequence = shift; # "Content pages" # - 1 .. #daysInMonth pages # - 1 title page # # "Paper pages" # - content pages re-ordered for booklet # - order depends on # * number of paper pages per sheet page # * order of sheet pages (per page / per side) # # "Sheet pages" # - pages as printed # - each is either front or rear # * definition: first content page is on front of sheet page # - each contains 4 paper pages, # in the order: top left, top right, bottom left, bottom right # - may be ordered # * per page: sheet 1 front, sheet 1 rear, sheet 2 front, ... # * per side: sheet 1 front, sheet 2 front, ..., sheet 1 rear, ... # # "Sheet" # - contains 2 sheet pages: front + rear # - #sheets = int((#ContentPages + 7) / 8) ReportParolIDsWithUnderscore($TheWordFile1, $month); my $Patterns = ExtractPatternsFromContentXml($content) or return 0; $Patterns = ProcessPatterns($TheWordFile1, $TheWordFile2, $month, $Patterns) or return 0; my $ContentPages = ComputeContentPages($TheWordFile1, $TheWordFile2, $month, $Patterns, $ProcessSpecs) or return 0; my $PaperPages = ReorderContentPages($ContentPages, $sequence) or return 0; my $resTables = FormatSheetPages($Patterns, $PaperPages); # insert result string into entire XML content.xml $content =~ s@()@$resTables@s; # [TablePattern] return $content; } # main sub main { binmode(STDOUT, ":utf8"); binmode(STDERR, ":utf8"); SetStdErrListener(); my $IInputController = App::Input::InputController->new( params => [ { name => "h"}, { name => "help"}, { name => "infile"}, { name => "infile2"}, { name => "o"}, { name => "v"}, { name => "template"}, { name => "lstrings"}, { name => "sequence"}, { name => "processfile"}, { name => "month"}, ], ) or return 0; usage() if defined($IInputController->GetValue("h")) || defined($IInputController->GetValue("help")); my $v = $IInputController->GetValue("v"); $v = 1 unless defined($v); SetDebug($v) if defined($v); my $infile = $IInputController->GetValue("infile") or usage("infile missing"); my $infile2 = $IInputController->GetValue("infile2") or usage("infile2 missing"); my $outfile = $IInputController->GetValue("o") or usage("outfile missing"); my $month = $IInputController->GetValue("month") or usage("month missing"); $month =~ m/\d{4}-\d{2}/ or usage("month must have format YYYY-MM, e.g. 2011-01"); my $templateFilename = $IInputController->GetValue("template") or usage("template missing"); my $lstrings = $IInputController->GetValue("lstrings") || _LSTRINGS(); my $sequence = $IInputController->GetValue("sequence") || 0; my $processfile = $IInputController->GetValue("processfile") || ""; $gLStrings = LStrings::LStringsIO::load($lstrings) or die; my $XmlReader = File::XmlReader->new() or die; my $Reader = Bible20::TheWord::Reader->new() or die; my $TheWordFile1 = $Reader->Read($XmlReader->Read(filename => $infile)) or die; my $TheWordFile2 = $Reader->Read($XmlReader->Read(filename => $infile2)) or die; # read template: content.xml my $contentFilename; my $content; { my $tmpdir = File::Spec->tmpdir(); MsgDebug("Using template $templateFilename, tmpdir $tmpdir"); my $cmd = "unzip -o -d $tmpdir $templateFilename content.xml"; MsgDebug($cmd); `$cmd` or die; $contentFilename = File::Spec->catfile($tmpdir, "content.xml"); if (!-e $contentFilename) { MsgError("$contentFilename does not exist after unzip of $templateFilename"); return 0; } my $XmlReader = File::XmlReader->new() or die; $content = $XmlReader->Read(filename => $contentFilename) or die; } # processfile my $ProcessSpecs; if ($processfile) { $ProcessSpecs = ParseProcessFile($XmlReader->Read(filename => $processfile)); } MsgDebug("Formatting $month into content.xml"); $content = Format( month => $month, TheWordFile1 => $TheWordFile1, TheWordFile2 => $TheWordFile2, ProcessSpecs => $ProcessSpecs, sequence => $sequence, content => $content) or die; # copy to target my $XmlWriter = File::XmlWriter->new() or die; $XmlWriter->Write(filename => $contentFilename, data => $content) or die; MsgDebug("Copying $templateFilename -> $outfile"); copy($templateFilename, $outfile) or die; my $cmd = "zip -j $outfile $contentFilename"; MsgDebug("Updating $contentFilename in $outfile: $cmd"); `$cmd` or die; } &main;