����JFIF��x�x����'
Server IP : 66.29.137.217 / Your IP : 216.73.216.24 Web Server : LiteSpeed System : Linux premium294.web-hosting.com 4.18.0-513.11.1.lve.el8.x86_64 #1 SMP Thu Jan 18 16:21:02 UTC 2024 x86_64 User : gltevjme ( 1095) PHP Version : 7.0.33 Disable Function : NONE MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : OFF | Pkexec : OFF Directory : /home/gltevjme/./cbt.bofirm.com/vendor/voku/simple_html_dom/src/voku/helper/ |
Upload File : |
<?php declare(strict_types=1); namespace voku\helper; /** * @property-read string $outerText * <p>Get dom node's outer html (alias for "outerHtml").</p> * @property-read string $outerHtml * <p>Get dom node's outer html.</p> * @property-read string $innerText * <p>Get dom node's inner html (alias for "innerHtml").</p> * @property-read string $innerHtml * <p>Get dom node's inner html.</p> * @property-read string $plaintext * <p>Get dom node's plain text.</p> * * @method string outerText() * <p>Get dom node's outer html (alias for "outerHtml()").</p> * @method string outerHtml() * <p>Get dom node's outer html.</p> * @method string innerText() * <p>Get dom node's inner html (alias for "innerHtml()").</p> * @method HtmlDomParser load(string $html) * <p>Load HTML from string.</p> * @method HtmlDomParser load_file(string $html) * <p>Load HTML from file.</p> * @method static HtmlDomParser file_get_html($filePath, $libXMLExtraOptions = null) * <p>Load HTML from file.</p> * @method static HtmlDomParser str_get_html($html, $libXMLExtraOptions = null) * <p>Load HTML from string.</p> */ class HtmlDomParser extends AbstractDomParser { /** * @var callable|null * * @phpstan-var null|callable(string $cssSelectorString, string $xPathString, \DOMXPath, \voku\helper\HtmlDomParser): string */ private $callbackXPathBeforeQuery; /** * @var callable|null * * @phpstan-var null|callable(string $htmlString, \voku\helper\HtmlDomParser): string */ private $callbackBeforeCreateDom; /** * @var string[] */ protected static $functionAliases = [ 'outertext' => 'html', 'outerhtml' => 'html', 'innertext' => 'innerHtml', 'innerhtml' => 'innerHtml', 'load' => 'loadHtml', 'load_file' => 'loadHtmlFile', ]; /** * @var string[] */ protected $templateLogicSyntaxInSpecialScriptTags = [ '+', '<%', '{%', '{{', ]; /** * The properties specified for each special script tag is an array. * * ```php * protected $specialScriptTags = [ * 'text/html', * 'text/x-custom-template', * 'text/x-handlebars-template' * ] * ``` * * @var string[] */ protected $specialScriptTags = [ 'text/html', 'text/x-custom-template', 'text/x-handlebars-template', ]; /** * @var string[] */ protected $selfClosingTags = [ 'area', 'base', 'br', 'col', 'command', 'embed', 'hr', 'img', 'input', 'keygen', 'link', 'meta', 'param', 'source', 'track', 'wbr', ]; /** * @var bool */ protected $isDOMDocumentCreatedWithoutHtml = false; /** * @var bool */ protected $isDOMDocumentCreatedWithoutWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithCommentWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithoutHeadWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithoutPTagWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithoutHtmlWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithoutBodyWrapper = false; /** * @var bool */ protected $isDOMDocumentCreatedWithMultiRoot = false; /** * @var bool */ protected $isDOMDocumentCreatedWithFakeEndScript = false; /** * @var bool */ protected $keepBrokenHtml = false; /** * @param \DOMNode|SimpleHtmlDomInterface|string $element HTML code or SimpleHtmlDomInterface, \DOMNode */ public function __construct($element = null) { $this->document = new \DOMDocument('1.0', $this->getEncoding()); // DOMDocument settings $this->document->preserveWhiteSpace = true; $this->document->formatOutput = true; if ($element instanceof SimpleHtmlDomInterface) { $element = $element->getNode(); } if ($element instanceof \DOMNode) { $domNode = $this->document->importNode($element, true); if ($domNode instanceof \DOMNode) { $this->document->appendChild($domNode); } return; } if ($element !== null) { $this->loadHtml($element); } } /** * @param string $name * @param array $arguments * * @return bool|mixed */ public function __call($name, $arguments) { $name = \strtolower($name); if (isset(self::$functionAliases[$name])) { return \call_user_func_array([$this, self::$functionAliases[$name]], $arguments); } throw new \BadMethodCallException('Method does not exist: ' . $name); } /** * @param string $name * @param array $arguments * * @throws \BadMethodCallException * @throws \RuntimeException * * @return static */ public static function __callStatic($name, $arguments) { $arguments0 = $arguments[0] ?? ''; $arguments1 = $arguments[1] ?? null; if ($name === 'str_get_html') { $parser = new static(); return $parser->loadHtml($arguments0, $arguments1); } if ($name === 'file_get_html') { $parser = new static(); return $parser->loadHtmlFile($arguments0, $arguments1); } throw new \BadMethodCallException('Method does not exist'); } /** @noinspection MagicMethodsValidityInspection */ /** * @param string $name * * @return string|null */ public function __get($name) { $name = \strtolower($name); switch ($name) { case 'outerhtml': case 'outertext': return $this->html(); case 'innerhtml': case 'innertext': return $this->innerHtml(); case 'innerhtmlkeep': return $this->innerHtml(false, false); case 'text': case 'plaintext': return $this->text(); } return null; } /** * @return string */ public function __toString() { return $this->html(); } /** * does nothing (only for api-compatibility-reasons) * * @return bool * * @deprecated */ public function clear(): bool { return true; } /** * Create DOMDocument from HTML. * * @param string $html * @param int|null $libXMLExtraOptions * * @return \DOMDocument */ protected function createDOMDocument(string $html, $libXMLExtraOptions = null): \DOMDocument { if ($this->callbackBeforeCreateDom) { $html = \call_user_func($this->callbackBeforeCreateDom, $html, $this); } // Remove content before <!DOCTYPE.*> because otherwise the DOMDocument can not handle the input. $isDOMDocumentCreatedWithDoctype = false; if (\stripos($html, '<!DOCTYPE') !== false) { $isDOMDocumentCreatedWithDoctype = true; if ( \preg_match('/(^.*?)<!DOCTYPE(?: [^>]*)?>/sui', $html, $matches_before_doctype) && \trim($matches_before_doctype[1]) ) { $html = \str_replace($matches_before_doctype[1], '', $html); } } if ($this->keepBrokenHtml) { $html = $this->keepBrokenHtml(\trim($html)); } if (\strpos($html, '<') === false) { $this->isDOMDocumentCreatedWithoutHtml = true; } elseif (\strpos(\ltrim($html), '<') !== 0) { $this->isDOMDocumentCreatedWithoutWrapper = true; } if (\strpos(\ltrim($html), '<!--') === 0) { $this->isDOMDocumentCreatedWithCommentWrapper = true; } /** @noinspection HtmlRequiredLangAttribute */ if ( \strpos($html, '<html ') === false && \strpos($html, '<html>') === false ) { $this->isDOMDocumentCreatedWithoutHtmlWrapper = true; } if ( \strpos($html, '<body ') === false && \strpos($html, '<body>') === false ) { $this->isDOMDocumentCreatedWithoutBodyWrapper = true; } /** @noinspection HtmlRequiredTitleElement */ if ( \strpos($html, '<head ') === false && \strpos($html, '<head>') === false ) { $this->isDOMDocumentCreatedWithoutHeadWrapper = true; } if ( \strpos($html, '<p ') === false && \strpos($html, '<p>') === false ) { $this->isDOMDocumentCreatedWithoutPTagWrapper = true; } if ( \strpos($html, '</script>') === false && \strpos($html, '<\/script>') !== false ) { $this->isDOMDocumentCreatedWithFakeEndScript = true; } if (\stripos($html, '</html>') !== false) { /** @noinspection NestedPositiveIfStatementsInspection */ if ( \preg_match('/<\/html>(.*?)/suiU', $html, $matches_after_html) && \trim($matches_after_html[1]) ) { $html = \str_replace($matches_after_html[0], $matches_after_html[1] . '</html>', $html); } } if (\strpos($html, '<script') !== false) { $this->html5FallbackForScriptTags($html); foreach ($this->specialScriptTags as $tag) { if (\strpos($html, $tag) !== false) { $this->keepSpecialScriptTags($html); } } } if (\strpos($html, '<svg') !== false) { $this->keepSpecialSvgTags($html); } if ( $this->isDOMDocumentCreatedWithoutHtmlWrapper && $this->isDOMDocumentCreatedWithoutBodyWrapper ) { if (\substr_count($html, '</') >= 2) { $regexForMultiRootDetection = '#<(.*)>.*?</(\1)>#su'; \preg_match($regexForMultiRootDetection, $html, $matches); if (($matches[0] ?? '') !== $html) { $htmlTmp = \preg_replace($regexForMultiRootDetection, '', $html); if ($htmlTmp !== null && trim($htmlTmp) === '') { $this->isDOMDocumentCreatedWithMultiRoot = true; } } } } $html = \str_replace( \array_map(static function ($e) { return '<' . $e . '>'; }, $this->selfClosingTags), \array_map(static function ($e) { return '<' . $e . '/>'; }, $this->selfClosingTags), $html ); // set error level $internalErrors = \libxml_use_internal_errors(true); if (\PHP_VERSION_ID < 80000) { $disableEntityLoader = \libxml_disable_entity_loader(true); } \libxml_clear_errors(); $optionsXml = \LIBXML_DTDLOAD | \LIBXML_DTDATTR | \LIBXML_NONET; if (\defined('LIBXML_BIGLINES')) { $optionsXml |= \LIBXML_BIGLINES; } if (\defined('LIBXML_COMPACT')) { $optionsXml |= \LIBXML_COMPACT; } if (\defined('LIBXML_HTML_NODEFDTD')) { $optionsXml |= \LIBXML_HTML_NODEFDTD; } if ($libXMLExtraOptions !== null) { $optionsXml |= $libXMLExtraOptions; } if ( $this->isDOMDocumentCreatedWithMultiRoot || $this->isDOMDocumentCreatedWithoutWrapper || $this->isDOMDocumentCreatedWithCommentWrapper || ( !$isDOMDocumentCreatedWithDoctype && $this->keepBrokenHtml ) ) { $html = '<' . self::$domHtmlWrapperHelper . '>' . $html . '</' . self::$domHtmlWrapperHelper . '>'; } $html = self::replaceToPreserveHtmlEntities($html); $documentFound = false; $sxe = \simplexml_load_string($html, \SimpleXMLElement::class, $optionsXml); if ($sxe !== false && \count(\libxml_get_errors()) === 0) { $domElementTmp = \dom_import_simplexml($sxe); if ($domElementTmp->ownerDocument instanceof \DOMDocument) { $documentFound = true; $this->document = $domElementTmp->ownerDocument; } } if ($documentFound === false) { // UTF-8 hack: http://php.net/manual/en/domdocument.loadhtml.php#95251 $xmlHackUsed = false; if (\stripos('<?xml', $html) !== 0) { $xmlHackUsed = true; $html = '<?xml encoding="' . $this->getEncoding() . '" ?>' . $html; } if ($html !== '') { $this->document->loadHTML($html, $optionsXml); } // remove the "xml-encoding" hack if ($xmlHackUsed) { foreach ($this->document->childNodes as $child) { if ($child->nodeType === \XML_PI_NODE) { $this->document->removeChild($child); break; } } } } // set encoding $this->document->encoding = $this->getEncoding(); // restore lib-xml settings \libxml_clear_errors(); \libxml_use_internal_errors($internalErrors); if (\PHP_VERSION_ID < 80000 && isset($disableEntityLoader)) { \libxml_disable_entity_loader($disableEntityLoader); } return $this->document; } /** * Find list of nodes with a CSS selector. * * @param string $selector * @param int|null $idx * * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function find(string $selector, $idx = null) { $xPathQuery = SelectorConverter::toXPath($selector); $xPath = new \DOMXPath($this->document); if ($this->callbackXPathBeforeQuery) { $xPathQuery = \call_user_func($this->callbackXPathBeforeQuery, $selector, $xPathQuery, $xPath, $this); } $nodesList = $xPath->query($xPathQuery); $elements = new SimpleHtmlDomNode(); if ($nodesList) { foreach ($nodesList as $node) { $elements[] = new SimpleHtmlDom($node); } } // return all elements if ($idx === null) { if (\count($elements) === 0) { return new SimpleHtmlDomNodeBlank(); } return $elements; } // handle negative values if ($idx < 0) { $idx = \count($elements) + $idx; } // return one element return $elements[$idx] ?? new SimpleHtmlDomBlank(); } /** * Find nodes with a CSS selector. * * @param string $selector * * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function findMulti(string $selector): SimpleHtmlDomNodeInterface { return $this->find($selector, null); } /** * Find nodes with a CSS selector or false, if no element is found. * * @param string $selector * * @return false|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function findMultiOrFalse(string $selector) { $return = $this->find($selector, null); if ($return instanceof SimpleHtmlDomNodeBlank) { return false; } return $return; } /** * Find one node with a CSS selector. * * @param string $selector * * @return SimpleHtmlDomInterface */ public function findOne(string $selector): SimpleHtmlDomInterface { return $this->find($selector, 0); } /** * Find one node with a CSS selector or false, if no element is found. * * @param string $selector * * @return false|SimpleHtmlDomInterface */ public function findOneOrFalse(string $selector) { $return = $this->find($selector, 0); if ($return instanceof SimpleHtmlDomBlank) { return false; } return $return; } /** * @param string $content * @param bool $multiDecodeNewHtmlEntity * @param bool $putBrokenReplacedBack * * @return string */ public function fixHtmlOutput( string $content, bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true ): string { // INFO: DOMDocument will encapsulate plaintext into a e.g. paragraph tag (<p>), // so we try to remove it here again ... if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) { /** @noinspection HtmlRequiredLangAttribute */ $content = \str_replace( [ '<html>', '</html>', ], '', $content ); } if ($this->getIsDOMDocumentCreatedWithoutHeadWrapper()) { /** @noinspection HtmlRequiredTitleElement */ $content = \str_replace( [ '<head>', '</head>', ], '', $content ); } if ($this->getIsDOMDocumentCreatedWithoutBodyWrapper()) { $content = \str_replace( [ '<body>', '</body>', ], '', $content ); } if ($this->getIsDOMDocumentCreatedWithFakeEndScript()) { $content = \str_replace( '</script>', '', $content ); } if ($this->getIsDOMDocumentCreatedWithoutWrapper()) { $content = (string) \preg_replace('/^<p>/', '', $content); $content = (string) \preg_replace('/<\/p>/', '', $content); } if ($this->getIsDOMDocumentCreatedWithoutPTagWrapper()) { $content = \str_replace( [ '<p>', '</p>', ], '', $content ); } if ($this->getIsDOMDocumentCreatedWithoutHtml()) { $content = \str_replace( '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">', '', $content ); } // https://bugs.php.net/bug.php?id=73175 $content = \str_replace( \array_map(static function ($e) { return '</' . $e . '>'; }, $this->selfClosingTags), '', $content ); /** @noinspection HtmlRequiredTitleElement */ $content = \trim( \str_replace( [ '<simpleHtmlDomHtml>', '</simpleHtmlDomHtml>', '<simpleHtmlDomP>', '</simpleHtmlDomP>', '<head><head>', '</head></head>', ], [ '', '', '', '', '<head>', '</head>', ], $content ) ); $content = $this->decodeHtmlEntity($content, $multiDecodeNewHtmlEntity); return self::putReplacedBackToPreserveHtmlEntities($content, $putBrokenReplacedBack); } /** * Return elements by ".class". * * @param string $class * * @return SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function getElementByClass(string $class): SimpleHtmlDomNodeInterface { return $this->findMulti('.' . $class); } /** * Return element by #id. * * @param string $id * * @return SimpleHtmlDomInterface */ public function getElementById(string $id): SimpleHtmlDomInterface { return $this->findOne('#' . $id); } /** * Return element by tag name. * * @param string $name * * @return SimpleHtmlDomInterface */ public function getElementByTagName(string $name): SimpleHtmlDomInterface { $node = $this->document->getElementsByTagName($name)->item(0); if ($node === null) { return new SimpleHtmlDomBlank(); } return new SimpleHtmlDom($node); } /** * Returns elements by "#id". * * @param string $id * @param int|null $idx * * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function getElementsById(string $id, $idx = null) { return $this->find('#' . $id, $idx); } /** * Returns elements by tag name. * * @param string $name * @param int|null $idx * * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function getElementsByTagName(string $name, $idx = null) { $nodesList = $this->document->getElementsByTagName($name); $elements = new SimpleHtmlDomNode(); foreach ($nodesList as $node) { $elements[] = new SimpleHtmlDom($node); } // return all elements if ($idx === null) { if (\count($elements) === 0) { return new SimpleHtmlDomNodeBlank(); } return $elements; } // handle negative values if ($idx < 0) { $idx = \count($elements) + $idx; } // return one element return $elements[$idx] ?? new SimpleHtmlDomNodeBlank(); } /** * Get dom node's outer html. * * @param bool $multiDecodeNewHtmlEntity * @param bool $putBrokenReplacedBack * * @return string */ public function html(bool $multiDecodeNewHtmlEntity = false, bool $putBrokenReplacedBack = true): string { if (static::$callback !== null) { \call_user_func(static::$callback, [$this]); } if ($this->getIsDOMDocumentCreatedWithoutHtmlWrapper()) { $content = $this->document->saveHTML($this->document->documentElement); } else { $content = $this->document->saveHTML(); } if ($content === false) { return ''; } return $this->fixHtmlOutput($content, $multiDecodeNewHtmlEntity, $putBrokenReplacedBack); } /** * Load HTML from string. * * @param string $html * @param int|null $libXMLExtraOptions * * @return $this */ public function loadHtml(string $html, $libXMLExtraOptions = null): DomParserInterface { $this->document = $this->createDOMDocument($html, $libXMLExtraOptions); return $this; } /** * Load HTML from file. * * @param string $filePath * @param int|null $libXMLExtraOptions * * @throws \RuntimeException * * @return $this */ public function loadHtmlFile(string $filePath, $libXMLExtraOptions = null): DomParserInterface { if ( !\preg_match("/^https?:\/\//i", $filePath) && !\file_exists($filePath) ) { throw new \RuntimeException('File ' . $filePath . ' not found'); } try { if (\class_exists('\voku\helper\UTF8')) { $html = \voku\helper\UTF8::file_get_contents($filePath); } else { $html = \file_get_contents($filePath); } } catch (\Exception $e) { throw new \RuntimeException('Could not load file ' . $filePath); } if ($html === false) { throw new \RuntimeException('Could not load file ' . $filePath); } return $this->loadHtml($html, $libXMLExtraOptions); } /** * Get the HTML as XML or plain XML if needed. * * @param bool $multiDecodeNewHtmlEntity * @param bool $htmlToXml * @param bool $removeXmlHeader * @param int $options * * @return string */ public function xml( bool $multiDecodeNewHtmlEntity = false, bool $htmlToXml = true, bool $removeXmlHeader = true, int $options = \LIBXML_NOEMPTYTAG ): string { $xml = $this->document->saveXML(null, $options); if ($xml === false) { return ''; } if ($removeXmlHeader) { $xml = \ltrim((string) \preg_replace('/<\?xml.*\?>/', '', $xml)); } if ($htmlToXml) { $return = $this->fixHtmlOutput($xml, $multiDecodeNewHtmlEntity); } else { $xml = $this->decodeHtmlEntity($xml, $multiDecodeNewHtmlEntity); $return = self::putReplacedBackToPreserveHtmlEntities($xml); } return $return; } /** * @param string $selector * @param int $idx * * @return SimpleHtmlDomInterface|SimpleHtmlDomInterface[]|SimpleHtmlDomNodeInterface<SimpleHtmlDomInterface> */ public function __invoke($selector, $idx = null) { return $this->find($selector, $idx); } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutHeadWrapper(): bool { return $this->isDOMDocumentCreatedWithoutHeadWrapper; } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutPTagWrapper(): bool { return $this->isDOMDocumentCreatedWithoutPTagWrapper; } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutHtml(): bool { return $this->isDOMDocumentCreatedWithoutHtml; } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutBodyWrapper(): bool { return $this->isDOMDocumentCreatedWithoutBodyWrapper; } /** * @return bool */ public function getIsDOMDocumentCreatedWithMultiRoot(): bool { return $this->isDOMDocumentCreatedWithMultiRoot; } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutHtmlWrapper(): bool { return $this->isDOMDocumentCreatedWithoutHtmlWrapper; } /** * @return bool */ public function getIsDOMDocumentCreatedWithoutWrapper(): bool { return $this->isDOMDocumentCreatedWithoutWrapper; } /** * @return bool */ public function getIsDOMDocumentCreatedWithFakeEndScript(): bool { return $this->isDOMDocumentCreatedWithFakeEndScript; } /** * @param string $html * * @return string */ protected function keepBrokenHtml(string $html): string { do { $original = $html; $html = (string) \preg_replace_callback( '/(?<start>.*)<(?<element_start>[a-z]+)(?<element_start_addon> [^>]*)?>(?<value>.*?)<\/(?<element_end>\2)>(?<end>.*)/sui', static function ($matches) { return $matches['start'] . '°lt_simple_html_dom__voku_°' . $matches['element_start'] . $matches['element_start_addon'] . '°gt_simple_html_dom__voku_°' . $matches['value'] . '°lt/_simple_html_dom__voku_°' . $matches['element_end'] . '°gt_simple_html_dom__voku_°' . $matches['end']; }, $html ); } while ($original !== $html); do { $original = $html; $html = (string) \preg_replace_callback( '/(?<start>[^<]*)?(?<broken>(?:<\/\w+(?:\s+\w+=\"[^"]+\")*+[^<]+>)+)(?<end>.*)/u', static function ($matches) { $matches['broken'] = \str_replace( ['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'], ['</', '<', '>'], $matches['broken'] ); self::$domBrokenReplaceHelper['orig'][] = $matches['broken']; self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = self::$domHtmlBrokenHtmlHelper . \crc32($matches['broken']); return $matches['start'] . $matchesHash . $matches['end']; }, $html ); } while ($original !== $html); return \str_replace( ['°lt/_simple_html_dom__voku_°', '°lt_simple_html_dom__voku_°', '°gt_simple_html_dom__voku_°'], ['</', '<', '>'], $html ); } /** * workaround for bug: https://bugs.php.net/bug.php?id=74628 * * @param string $html * * @return void */ protected function keepSpecialSvgTags(string &$html) { // regEx for e.g.: [mask-image:url('data:image/svg+xml;utf8,<svg viewBox="0 0 100 100" xmlns="http://www.w3.org/2000/svg">...</svg>')] /** @noinspection HtmlDeprecatedTag */ $regExSpecialSvg = '/\((["\'])?(?<start>data:image\/svg.*)<svg(?<attr>[^>]*?)>(?<content>.*)<\/svg>\1\)/isU'; $htmlTmp = \preg_replace_callback( $regExSpecialSvg, static function ($svgs) { if (empty($svgs['content'])) { return $svgs[0]; } $content = '<svg' . $svgs['attr'] . '>' . $svgs['content'] . '</svg>'; self::$domBrokenReplaceHelper['orig'][] = $content; self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = self::$domHtmlBrokenHtmlHelper . \crc32($content); return '(' . $svgs[1] . $svgs['start'] . $matchesHash . $svgs[1] . ')'; }, $html ); if ($htmlTmp !== null) { $html = $htmlTmp; } } /** * @param string $html * * @return void */ protected function keepSpecialScriptTags(string &$html) { // regEx for e.g.: [<script id="elements-image-1" type="text/html">...</script>] $tags = \implode('|', \array_map( static function ($value) { return \preg_quote($value, '/'); }, $this->specialScriptTags )); $html = (string) \preg_replace_callback( '/(?<start>(<script [^>]*type=["\']?(?:' . $tags . ')+[^>]*>))(?<innerContent>.*)(?<end><\/script>)/isU', function ($matches) { // Check for logic in special script tags, like [<% _.each(tierPrices, function(item, key) { %>], // because often this looks like non-valid html in the template itself. foreach ($this->templateLogicSyntaxInSpecialScriptTags as $logicSyntaxInSpecialScriptTag) { if (\strpos($matches['innerContent'], $logicSyntaxInSpecialScriptTag) !== false) { // remove the html5 fallback $matches['innerContent'] = \str_replace('<\/', '</', $matches['innerContent']); self::$domBrokenReplaceHelper['orig'][] = $matches['innerContent']; self::$domBrokenReplaceHelper['tmp'][] = $matchesHash = self::$domHtmlBrokenHtmlHelper . \crc32($matches['innerContent']); return $matches['start'] . $matchesHash . $matches['end']; } } // remove the html5 fallback $matches[0] = \str_replace('<\/', '</', $matches[0]); $specialNonScript = '<' . self::$domHtmlSpecialScriptHelper . \substr($matches[0], \strlen('<script')); return \substr($specialNonScript, 0, -\strlen('</script>')) . '</' . self::$domHtmlSpecialScriptHelper . '>'; }, $html ); } /** * @param bool $keepBrokenHtml * * @return $this */ public function useKeepBrokenHtml(bool $keepBrokenHtml): DomParserInterface { $this->keepBrokenHtml = $keepBrokenHtml; return $this; } /** * @param string[] $templateLogicSyntaxInSpecialScriptTags * * @return $this */ public function overwriteTemplateLogicSyntaxInSpecialScriptTags(array $templateLogicSyntaxInSpecialScriptTags): DomParserInterface { foreach ($templateLogicSyntaxInSpecialScriptTags as $tmp) { if (!\is_string($tmp)) { throw new \InvalidArgumentException('setTemplateLogicSyntaxInSpecialScriptTags only allows string[]'); } } $this->templateLogicSyntaxInSpecialScriptTags = $templateLogicSyntaxInSpecialScriptTags; return $this; } /** * @param string[] $specialScriptTags * * @return $this */ public function overwriteSpecialScriptTags(array $specialScriptTags): DomParserInterface { foreach ($specialScriptTags as $tag) { if (!\is_string($tag)) { throw new \InvalidArgumentException('SpecialScriptTags only allows string[]'); } } $this->specialScriptTags = $specialScriptTags; return $this; } /** * @param callable $callbackXPathBeforeQuery * * @phpstan-param callable(string $cssSelectorString, string $xPathString,\DOMXPath,\voku\helper\HtmlDomParser): string $callbackXPathBeforeQuery * * @return $this */ public function setCallbackXPathBeforeQuery(callable $callbackXPathBeforeQuery): self { $this->callbackXPathBeforeQuery = $callbackXPathBeforeQuery; return $this; } /** * @param callable $callbackBeforeCreateDom * * @phpstan-param callable(string $htmlString, \voku\helper\HtmlDomParser): string $callbackBeforeCreateDom * * @return $this */ public function setCallbackBeforeCreateDom(callable $callbackBeforeCreateDom): self { $this->callbackBeforeCreateDom = $callbackBeforeCreateDom; return $this; } }