542 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			542 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			PHP
		
	
	
		
			Executable File
		
	
	
	
	
| <?php
 | |
| /**
 | |
|  *  Base include file for SimpleTest
 | |
|  *  @package    SimpleTest
 | |
|  *  @subpackage WebTester
 | |
|  *  @version    $Id: page.php 1938 2009-08-05 17:16:23Z dgheath $
 | |
|  */
 | |
| 
 | |
| /**#@+
 | |
|     *   include other SimpleTest class files
 | |
|     */
 | |
| require_once(dirname(__FILE__) . '/http.php');
 | |
| require_once(dirname(__FILE__) . '/php_parser.php');
 | |
| require_once(dirname(__FILE__) . '/tag.php');
 | |
| require_once(dirname(__FILE__) . '/form.php');
 | |
| require_once(dirname(__FILE__) . '/selector.php');
 | |
| /**#@-*/
 | |
| 
 | |
| /**
 | |
|  *    A wrapper for a web page.
 | |
|  *    @package SimpleTest
 | |
|  *    @subpackage WebTester
 | |
|  */
 | |
| class SimplePage {
 | |
|     private $links = array();
 | |
|     private $title = false;
 | |
|     private $last_widget;
 | |
|     private $label;
 | |
|     private $forms = array();
 | |
|     private $frames = array();
 | |
|     private $transport_error;
 | |
|     private $raw;
 | |
|     private $text = false;
 | |
|     private $sent;
 | |
|     private $headers;
 | |
|     private $method;
 | |
|     private $url;
 | |
|     private $base = false;
 | |
|     private $request_data;
 | |
| 
 | |
|     /**
 | |
|      *    Parses a page ready to access it's contents.
 | |
|      *    @param SimpleHttpResponse $response     Result of HTTP fetch.
 | |
|      *    @access public
 | |
|      */
 | |
|     function __construct($response = false) {
 | |
|         if ($response) {
 | |
|             $this->extractResponse($response);
 | |
|         } else {
 | |
|             $this->noResponse();
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Extracts all of the response information.
 | |
|      *    @param SimpleHttpResponse $response    Response being parsed.
 | |
|      *    @access private
 | |
|      */
 | |
|     protected function extractResponse($response) {
 | |
|         $this->transport_error = $response->getError();
 | |
|         $this->raw = $response->getContent();
 | |
|         $this->sent = $response->getSent();
 | |
|         $this->headers = $response->getHeaders();
 | |
|         $this->method = $response->getMethod();
 | |
|         $this->url = $response->getUrl();
 | |
|         $this->request_data = $response->getRequestData();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets up a missing response.
 | |
|      *    @access private
 | |
|      */
 | |
|     protected function noResponse() {
 | |
|         $this->transport_error = 'No page fetched yet';
 | |
|         $this->raw = false;
 | |
|         $this->sent = false;
 | |
|         $this->headers = false;
 | |
|         $this->method = 'GET';
 | |
|         $this->url = false;
 | |
|         $this->request_data = false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Original request as bytes sent down the wire.
 | |
|      *    @return mixed              Sent content.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getRequest() {
 | |
|         return $this->sent;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for raw text of page.
 | |
|      *    @return string        Raw unparsed content.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getRaw() {
 | |
|         return $this->raw;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for plain text of page as a text browser
 | |
|      *    would see it.
 | |
|      *    @return string        Plain text of page.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getText() {
 | |
|         if (! $this->text) {
 | |
|             $this->text = SimplePage::normalise($this->raw);
 | |
|         }
 | |
|         return $this->text;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for raw headers of page.
 | |
|      *    @return string       Header block as text.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getHeaders() {
 | |
|         if ($this->headers) {
 | |
|             return $this->headers->getRaw();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Original request method.
 | |
|      *    @return string        GET, POST or HEAD.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getMethod() {
 | |
|         return $this->method;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Original resource name.
 | |
|      *    @return SimpleUrl        Current url.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getUrl() {
 | |
|         return $this->url;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Base URL if set via BASE tag page url otherwise
 | |
|      *    @return SimpleUrl        Base url.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getBaseUrl() {
 | |
|         return $this->base;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Original request data.
 | |
|      *    @return mixed              Sent content.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getRequestData() {
 | |
|         return $this->request_data;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for last error.
 | |
|      *    @return string        Error from last response.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getTransportError() {
 | |
|         return $this->transport_error;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for current MIME type.
 | |
|      *    @return string    MIME type as string; e.g. 'text/html'
 | |
|      *    @access public
 | |
|      */
 | |
|     function getMimeType() {
 | |
|         if ($this->headers) {
 | |
|             return $this->headers->getMimeType();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for HTTP response code.
 | |
|      *    @return integer    HTTP response code received.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getResponseCode() {
 | |
|         if ($this->headers) {
 | |
|             return $this->headers->getResponseCode();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for last Authentication type. Only valid
 | |
|      *    straight after a challenge (401).
 | |
|      *    @return string    Description of challenge type.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getAuthentication() {
 | |
|         if ($this->headers) {
 | |
|             return $this->headers->getAuthentication();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for last Authentication realm. Only valid
 | |
|      *    straight after a challenge (401).
 | |
|      *    @return string    Name of security realm.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getRealm() {
 | |
|         if ($this->headers) {
 | |
|             return $this->headers->getRealm();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for current frame focus. Will be
 | |
|      *    false as no frames.
 | |
|      *    @return array    Always empty.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFrameFocus() {
 | |
|         return array();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets the focus by index. The integer index starts from 1.
 | |
|      *    @param integer $choice    Chosen frame.
 | |
|      *    @return boolean           Always false.
 | |
|      *    @access public
 | |
|      */
 | |
|     function setFrameFocusByIndex($choice) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets the focus by name. Always fails for a leaf page.
 | |
|      *    @param string $name    Chosen frame.
 | |
|      *    @return boolean        False as no frames.
 | |
|      *    @access public
 | |
|      */
 | |
|     function setFrameFocus($name) {
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Clears the frame focus. Does nothing for a leaf page.
 | |
|      *    @access public
 | |
|      */
 | |
|     function clearFrameFocus() {
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    TODO: write docs
 | |
|      */
 | |
|     function setFrames($frames) {
 | |
|         $this->frames = $frames;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Test to see if link is an absolute one.
 | |
|      *    @param string $url     Url to test.
 | |
|      *    @return boolean        True if absolute.
 | |
|      *    @access protected
 | |
|      */
 | |
|     protected function linkIsAbsolute($url) {
 | |
|         $parsed = new SimpleUrl($url);
 | |
|         return (boolean)($parsed->getScheme() && $parsed->getHost());
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Adds a link to the page.
 | |
|      *    @param SimpleAnchorTag $tag      Link to accept.
 | |
|      */
 | |
|     function addLink($tag) {
 | |
|         $this->links[] = $tag;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Set the forms
 | |
|      *    @param array $forms           An array of SimpleForm objects
 | |
|      */
 | |
|     function setForms($forms) {
 | |
|         $this->forms = $forms;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Test for the presence of a frameset.
 | |
|      *    @return boolean        True if frameset.
 | |
|      *    @access public
 | |
|      */
 | |
|     function hasFrames() {
 | |
|         return count($this->frames) > 0;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for frame name and source URL for every frame that
 | |
|      *    will need to be loaded. Immediate children only.
 | |
|      *    @return boolean/array     False if no frameset or
 | |
|      *                              otherwise a hash of frame URLs.
 | |
|      *                              The key is either a numerical
 | |
|      *                              base one index or the name attribute.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFrameset() {
 | |
|         if (! $this->hasFrames()) {
 | |
|             return false;
 | |
|         }
 | |
|         $urls = array();
 | |
|         for ($i = 0; $i < count($this->frames); $i++) {
 | |
|             $name = $this->frames[$i]->getAttribute('name');
 | |
|             $url = new SimpleUrl($this->frames[$i]->getAttribute('src'));
 | |
|             $urls[$name ? $name : $i + 1] = $this->expandUrl($url);
 | |
|         }
 | |
|         return $urls;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Fetches a list of loaded frames.
 | |
|      *    @return array/string    Just the URL for a single page.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFrames() {
 | |
|         $url = $this->expandUrl($this->getUrl());
 | |
|         return $url->asString();
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for a list of all links.
 | |
|      *    @return array   List of urls with scheme of
 | |
|      *                    http or https and hostname.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getUrls() {
 | |
|         $all = array();
 | |
|         foreach ($this->links as $link) {
 | |
|             $url = $this->getUrlFromLink($link);
 | |
|             $all[] = $url->asString();
 | |
|         }
 | |
|         return $all;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for URLs by the link label. Label will match
 | |
|      *    regardess of whitespace issues and case.
 | |
|      *    @param string $label    Text of link.
 | |
|      *    @return array           List of links with that label.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getUrlsByLabel($label) {
 | |
|         $matches = array();
 | |
|         foreach ($this->links as $link) {
 | |
|             if ($link->getText() == $label) {
 | |
|                 $matches[] = $this->getUrlFromLink($link);
 | |
|             }
 | |
|         }
 | |
|         return $matches;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for a URL by the id attribute.
 | |
|      *    @param string $id       Id attribute of link.
 | |
|      *    @return SimpleUrl       URL with that id of false if none.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getUrlById($id) {
 | |
|         foreach ($this->links as $link) {
 | |
|             if ($link->getAttribute('id') === (string)$id) {
 | |
|                 return $this->getUrlFromLink($link);
 | |
|             }
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Converts a link tag into a target URL.
 | |
|      *    @param SimpleAnchor $link    Parsed link.
 | |
|      *    @return SimpleUrl            URL with frame target if any.
 | |
|      *    @access private
 | |
|      */
 | |
|     protected function getUrlFromLink($link) {
 | |
|         $url = $this->expandUrl($link->getHref());
 | |
|         if ($link->getAttribute('target')) {
 | |
|             $url->setTarget($link->getAttribute('target'));
 | |
|         }
 | |
|         return $url;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Expands expandomatic URLs into fully qualified
 | |
|      *    URLs.
 | |
|      *    @param SimpleUrl $url        Relative URL.
 | |
|      *    @return SimpleUrl            Absolute URL.
 | |
|      *    @access public
 | |
|      */
 | |
|     function expandUrl($url) {
 | |
|         if (! is_object($url)) {
 | |
|             $url = new SimpleUrl($url);
 | |
|         }
 | |
|         $location = $this->getBaseUrl() ? $this->getBaseUrl() : new SimpleUrl();
 | |
|         return $url->makeAbsolute($location->makeAbsolute($this->getUrl()));
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets the base url for the page.
 | |
|      *    @param string $url    Base URL for page.
 | |
|      */
 | |
|     function setBase($url) {
 | |
|         $this->base = new SimpleUrl($url);
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets the title tag contents.
 | |
|      *    @param SimpleTitleTag $tag    Title of page.
 | |
|      */
 | |
|     function setTitle($tag) {
 | |
|         $this->title = $tag;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for parsed title.
 | |
|      *    @return string     Title or false if no title is present.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getTitle() {
 | |
|         if ($this->title) {
 | |
|             return $this->title->getText();
 | |
|         }
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Finds a held form by button label. Will only
 | |
|      *    search correctly built forms.
 | |
|      *    @param SimpleSelector $selector       Button finder.
 | |
|      *    @return SimpleForm                    Form object containing
 | |
|      *                                          the button.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFormBySubmit($selector) {
 | |
|         for ($i = 0; $i < count($this->forms); $i++) {
 | |
|             if ($this->forms[$i]->hasSubmit($selector)) {
 | |
|                 return $this->forms[$i];
 | |
|             }
 | |
|         }
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Finds a held form by image using a selector.
 | |
|      *    Will only search correctly built forms.
 | |
|      *    @param SimpleSelector $selector  Image finder.
 | |
|      *    @return SimpleForm               Form object containing
 | |
|      *                                     the image.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFormByImage($selector) {
 | |
|         for ($i = 0; $i < count($this->forms); $i++) {
 | |
|             if ($this->forms[$i]->hasImage($selector)) {
 | |
|                 return $this->forms[$i];
 | |
|             }
 | |
|         }
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Finds a held form by the form ID. A way of
 | |
|      *    identifying a specific form when we have control
 | |
|      *    of the HTML code.
 | |
|      *    @param string $id     Form label.
 | |
|      *    @return SimpleForm    Form object containing the matching ID.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getFormById($id) {
 | |
|         for ($i = 0; $i < count($this->forms); $i++) {
 | |
|             if ($this->forms[$i]->getId() == $id) {
 | |
|                 return $this->forms[$i];
 | |
|             }
 | |
|         }
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Sets a field on each form in which the field is
 | |
|      *    available.
 | |
|      *    @param SimpleSelector $selector    Field finder.
 | |
|      *    @param string $value               Value to set field to.
 | |
|      *    @return boolean                    True if value is valid.
 | |
|      *    @access public
 | |
|      */
 | |
|     function setField($selector, $value, $position=false) {
 | |
|         $is_set = false;
 | |
|         for ($i = 0; $i < count($this->forms); $i++) {
 | |
|             if ($this->forms[$i]->setField($selector, $value, $position)) {
 | |
|                 $is_set = true;
 | |
|             }
 | |
|         }
 | |
|         return $is_set;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Accessor for a form element value within a page.
 | |
|      *    @param SimpleSelector $selector    Field finder.
 | |
|      *    @return string/boolean             A string if the field is
 | |
|      *                                       present, false if unchecked
 | |
|      *                                       and null if missing.
 | |
|      *    @access public
 | |
|      */
 | |
|     function getField($selector) {
 | |
|         for ($i = 0; $i < count($this->forms); $i++) {
 | |
|             $value = $this->forms[$i]->getValue($selector);
 | |
|             if (isset($value)) {
 | |
|                 return $value;
 | |
|             }
 | |
|         }
 | |
|         return null;
 | |
|     }
 | |
| 
 | |
|     /**
 | |
|      *    Turns HTML into text browser visible text. Images
 | |
|      *    are converted to their alt text and tags are supressed.
 | |
|      *    Entities are converted to their visible representation.
 | |
|      *    @param string $html        HTML to convert.
 | |
|      *    @return string             Plain text.
 | |
|      *    @access public
 | |
|      */
 | |
|     static function normalise($html) {
 | |
|         $text = preg_replace('#<!--.*?-->#si', '', $html);
 | |
|         $text = preg_replace('#<(script|option|textarea)[^>]*>.*?</\1>#si', '', $text);
 | |
|         $text = preg_replace('#<img[^>]*alt\s*=\s*("([^"]*)"|\'([^\']*)\'|([a-zA-Z_]+))[^>]*>#', ' \2\3\4 ', $text);
 | |
|         $text = preg_replace('#<[^>]*>#', '', $text);
 | |
|         $text = html_entity_decode($text, ENT_QUOTES);
 | |
|         $text = preg_replace('#\s+#', ' ', $text);
 | |
|         return trim(trim($text), "\xA0");        // TODO: The \xAO is a  . Add a test for this.
 | |
|     }
 | |
| }
 | |
| ?>
 |