document = new DOMDocument(); $this->loadOptions = $loadOptions; if ($partialHtml) { $this->loadPartialHtml($partialHtml); } } /** * Load some HTML content that's part of a document (e.g. body content) * into the current document. */ public function loadPartialHtml(string $html): void { $html = '' . $html . ''; $this->document->loadHTML($html, $this->loadOptions); $this->xpath = null; } /** * Load a complete page of HTML content into the document. */ public function loadCompleteHtml(string $html): void { $html = '' . $html; $this->document->loadHTML($html, $this->loadOptions); $this->xpath = null; } /** * Start an XPath query on the current document. */ public function queryXPath(string $expression): DOMNodeList { if (is_null($this->xpath)) { $this->xpath = new DOMXPath($this->document); } $result = $this->xpath->query($expression); if ($result === false) { throw new \InvalidArgumentException("XPath query for expression [$expression] failed to execute"); } return $result; } /** * Create a new DOMElement instance within the document. */ public function createElement(string $localName, string $value = ''): DOMElement { $element = $this->document->createElement($localName, $value); if ($element === false) { throw new \InvalidArgumentException("Failed to create element of name [$localName] and value [$value]"); } return $element; } /** * Create a new text node within this document. */ public function createTextNode(string $text): DOMText { return $this->document->createTextNode($text); } /** * Get an element within the document of the given ID. */ public function getElementById(string $elementId): ?DOMElement { return $this->document->getElementById($elementId); } /** * Get the DOMNode that represents the HTML body. */ public function getBody(): DOMNode { return $this->document->getElementsByTagName('body')[0]; } /** * Get the nodes that are a direct child of the body. * This is usually all the content nodes if loaded partially. */ public function getBodyChildren(): DOMNodeList { return $this->getBody()->childNodes; } /** * Get the inner HTML content of the body. * This is usually all the content if loaded partially. */ public function getBodyInnerHtml(): string { $html = ''; foreach ($this->getBodyChildren() as $child) { $html .= $this->document->saveHTML($child); } return $html; } /** * Get the HTML content of the whole document. */ public function getHtml(): string { return $this->document->saveHTML($this->document->documentElement); } /** * Get the inner HTML for the given node. */ public function getNodeInnerHtml(DOMNode $node): string { $html = ''; foreach ($node->childNodes as $childNode) { $html .= $this->document->saveHTML($childNode); } return $html; } /** * Get the outer HTML for the given node. */ public function getNodeOuterHtml(DOMNode $node): string { return $this->document->saveHTML($node); } }