Skip to content

Commit

Permalink
[ENH] ZugferdDocumentPdfReader uses internally the ZugferdDocumentPdf…
Browse files Browse the repository at this point in the history
…ReaderExt
  • Loading branch information
HorstOeko committed Jan 13, 2025
1 parent d71b6a8 commit 7989261
Showing 1 changed file with 13 additions and 91 deletions.
104 changes: 13 additions & 91 deletions src/ZugferdDocumentPdfReader.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
use horstoeko\zugferd\exception\ZugferdUnknownProfileParameterException;
use horstoeko\zugferd\exception\ZugferdUnknownXmlContentException;
use JMS\Serializer\Exception\RuntimeException;
use Smalot\PdfParser\Parser as PdfParser;

/**
* Class representing the document reader for incoming PDF/A-Documents with
Expand All @@ -32,67 +31,45 @@
class ZugferdDocumentPdfReader
{
/**
* List of filenames which are possible in PDF
*/
public const ATTACHMENT_FILENAMES = [
'ZUGFeRD-invoice.xml'/*1.0*/,
'zugferd-invoice.xml'/*2.0*/,
'factur-x.xml'/*2.1*/,
'xrechnung.xml'
];

/**
* Load a PDF file (ZUGFeRD/Factur-X)
* Tries to load a PDF file (ZUGFeRD/Factur-X) and return a ZugferdDocumentReader
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @throws Exception
* @param string $pdfFilename
* @return ZugferdDocumentReader
* @throws Exception
* @throws RuntimeException
* @throws ZugferdFileNotFoundException
* @throws ZugferdFileNotReadableException
* @throws ZugferdNoPdfAttachmentFoundException
* @throws ZugferdUnknownXmlContentException
* @throws ZugferdUnknownProfileException
* @throws ZugferdUnknownProfileParameterException
* @throws RuntimeException
* @throws ZugferdUnknownXmlContentException
*/
public static function readAndGuessFromFile(string $pdfFilename): ZugferdDocumentReader
{
if (!file_exists($pdfFilename)) {
throw new ZugferdFileNotFoundException($pdfFilename);
}

$pdfContent = file_get_contents($pdfFilename);

if ($pdfContent === false) {
throw new ZugferdFileNotReadableException($pdfFilename);
}

return static::readAndGuessFromContent($pdfContent);
return ZugferdDocumentPdfReaderExt::readAndGuessFromFile($pdfFilename);
}

/**
* Tries to load an attachment content from PDF and return a ZugferdDocumentReader
*
* @param string $pdfContent String containing the binary pdf data
* @param string $pdfContent
* @return ZugferdDocumentReader
* @throws Exception
* @throws RuntimeException
* @throws ZugferdNoPdfAttachmentFoundException
* @throws ZugferdUnknownXmlContentException
* @throws ZugferdUnknownProfileException
* @throws ZugferdUnknownProfileParameterException
* @throws RuntimeException
* @throws ZugferdUnknownXmlContentException
*/
public static function readAndGuessFromContent(string $pdfContent): ZugferdDocumentReader
{
$xmlContent = static::internalExtractXMLFromPdfContent($pdfContent);

return ZugferdDocumentReader::readAndGuessFromContent($xmlContent);
return ZugferdDocumentPdfReaderExt::readAndGuessFromContent($pdfContent);
}

/**
* Returns a XML content from a PDF file
*
* @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable
* @param string $pdfFilename
* @return string
* @throws Exception
* @throws ZugferdFileNotFoundException
Expand All @@ -101,74 +78,19 @@ public static function readAndGuessFromContent(string $pdfContent): ZugferdDocum
*/
public static function getXmlFromFile(string $pdfFilename): string
{
if (!file_exists($pdfFilename)) {
throw new ZugferdFileNotFoundException($pdfFilename);
}

$pdfContent = file_get_contents($pdfFilename);

if ($pdfContent === false) {
throw new ZugferdFileNotReadableException($pdfFilename);
}

return static::getXmlFromContent($pdfContent);
return ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile($pdfFilename);
}

/**
* Returns a XML content from a PDF binary stream (string)
*
* @param string $pdfContent String Containing the binary pdf data
* @param string $pdfContent
* @return string
* @throws Exception
* @throws ZugferdNoPdfAttachmentFoundException
*/
public static function getXmlFromContent(string $pdfContent): string
{
return static::internalExtractXMLFromPdfContent($pdfContent);
}

/**
* Get the attachment content from XML.
* See the allowed filenames which are supported
*
* @param string $pdfContent
* @return string
* @throws Exception
* @throws ZugferdNoPdfAttachmentFoundException
*/
protected static function internalExtractXMLFromPdfContent(string $pdfContent): string
{
$pdfParser = new PdfParser();
$pdfParsed = $pdfParser->parseContent($pdfContent);
$filespecs = $pdfParsed->getObjectsByType('Filespec');

$attachmentFound = false;
$attachmentIndex = 0;
$embeddedFileIndex = 0;

foreach ($filespecs as $filespec) {
$filespecDetails = $filespec->getDetails();
if (in_array($filespecDetails['F'], static::ATTACHMENT_FILENAMES)) {
$attachmentFound = true;
break;
}
$attachmentIndex++;
}

if (true == $attachmentFound) {
/**
* @var array<\Smalot\PdfParser\PDFObject>
*/
$embeddedFiles = $pdfParsed->getObjectsByType('EmbeddedFile');
foreach ($embeddedFiles as $embeddedFile) {
if ($attachmentIndex == $embeddedFileIndex) {
return $embeddedFile->getContent();
}
$embeddedFileIndex++;
}
}

throw new ZugferdNoPdfAttachmentFoundException();
return ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromContent($pdfContent);
}
}

0 comments on commit 7989261

Please sign in to comment.