diff --git a/src/ZugferdDocumentPdfReader.php b/src/ZugferdDocumentPdfReader.php index e93942d..a0f3dd6 100644 --- a/src/ZugferdDocumentPdfReader.php +++ b/src/ZugferdDocumentPdfReader.php @@ -17,7 +17,6 @@ use horstoeko\zugferd\exception\ZugferdUnknownProfileParameterException; use horstoeko\zugferd\exception\ZugferdUnknownXmlContentException; use JMS\Serializer\Exception\RuntimeException; -use Smalot\PdfParser\Parser as PdfParser; /** * Class representing the document reader for incoming PDF/A-Documents with @@ -32,67 +31,45 @@ class ZugferdDocumentPdfReader { /** - * List of filenames which are possible in PDF - */ - public const ATTACHMENT_FILENAMES = [ - 'ZUGFeRD-invoice.xml'/*1.0*/, - 'zugferd-invoice.xml'/*2.0*/, - 'factur-x.xml'/*2.1*/, - 'xrechnung.xml' - ]; - - /** - * Load a PDF file (ZUGFeRD/Factur-X) + * Tries to load a PDF file (ZUGFeRD/Factur-X) and return a ZugferdDocumentReader * - * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable - * @throws Exception + * @param string $pdfFilename * @return ZugferdDocumentReader + * @throws Exception + * @throws RuntimeException * @throws ZugferdFileNotFoundException * @throws ZugferdFileNotReadableException * @throws ZugferdNoPdfAttachmentFoundException - * @throws ZugferdUnknownXmlContentException * @throws ZugferdUnknownProfileException * @throws ZugferdUnknownProfileParameterException - * @throws RuntimeException + * @throws ZugferdUnknownXmlContentException */ public static function readAndGuessFromFile(string $pdfFilename): ZugferdDocumentReader { - if (!file_exists($pdfFilename)) { - throw new ZugferdFileNotFoundException($pdfFilename); - } - - $pdfContent = file_get_contents($pdfFilename); - - if ($pdfContent === false) { - throw new ZugferdFileNotReadableException($pdfFilename); - } - - return static::readAndGuessFromContent($pdfContent); + return ZugferdDocumentPdfReaderExt::readAndGuessFromFile($pdfFilename); } /** * Tries to load an attachment content from PDF and return a ZugferdDocumentReader * - * @param string $pdfContent String containing the binary pdf data + * @param string $pdfContent * @return ZugferdDocumentReader * @throws Exception + * @throws RuntimeException * @throws ZugferdNoPdfAttachmentFoundException - * @throws ZugferdUnknownXmlContentException * @throws ZugferdUnknownProfileException * @throws ZugferdUnknownProfileParameterException - * @throws RuntimeException + * @throws ZugferdUnknownXmlContentException */ public static function readAndGuessFromContent(string $pdfContent): ZugferdDocumentReader { - $xmlContent = static::internalExtractXMLFromPdfContent($pdfContent); - - return ZugferdDocumentReader::readAndGuessFromContent($xmlContent); + return ZugferdDocumentPdfReaderExt::readAndGuessFromContent($pdfContent); } /** * Returns a XML content from a PDF file * - * @param string $pdfFilename Contains a full-qualified filename which must exist and must be readable + * @param string $pdfFilename * @return string * @throws Exception * @throws ZugferdFileNotFoundException @@ -101,23 +78,12 @@ public static function readAndGuessFromContent(string $pdfContent): ZugferdDocum */ public static function getXmlFromFile(string $pdfFilename): string { - if (!file_exists($pdfFilename)) { - throw new ZugferdFileNotFoundException($pdfFilename); - } - - $pdfContent = file_get_contents($pdfFilename); - - if ($pdfContent === false) { - throw new ZugferdFileNotReadableException($pdfFilename); - } - - return static::getXmlFromContent($pdfContent); + return ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromFile($pdfFilename); } /** * Returns a XML content from a PDF binary stream (string) * - * @param string $pdfContent String Containing the binary pdf data * @param string $pdfContent * @return string * @throws Exception @@ -125,50 +91,6 @@ public static function getXmlFromFile(string $pdfFilename): string */ public static function getXmlFromContent(string $pdfContent): string { - return static::internalExtractXMLFromPdfContent($pdfContent); - } - - /** - * Get the attachment content from XML. - * See the allowed filenames which are supported - * - * @param string $pdfContent - * @return string - * @throws Exception - * @throws ZugferdNoPdfAttachmentFoundException - */ - protected static function internalExtractXMLFromPdfContent(string $pdfContent): string - { - $pdfParser = new PdfParser(); - $pdfParsed = $pdfParser->parseContent($pdfContent); - $filespecs = $pdfParsed->getObjectsByType('Filespec'); - - $attachmentFound = false; - $attachmentIndex = 0; - $embeddedFileIndex = 0; - - foreach ($filespecs as $filespec) { - $filespecDetails = $filespec->getDetails(); - if (in_array($filespecDetails['F'], static::ATTACHMENT_FILENAMES)) { - $attachmentFound = true; - break; - } - $attachmentIndex++; - } - - if (true == $attachmentFound) { - /** - * @var array<\Smalot\PdfParser\PDFObject> - */ - $embeddedFiles = $pdfParsed->getObjectsByType('EmbeddedFile'); - foreach ($embeddedFiles as $embeddedFile) { - if ($attachmentIndex == $embeddedFileIndex) { - return $embeddedFile->getContent(); - } - $embeddedFileIndex++; - } - } - - throw new ZugferdNoPdfAttachmentFoundException(); + return ZugferdDocumentPdfReaderExt::getInvoiceDocumentContentFromContent($pdfContent); } }