PHP Classes

File: spiderExamp.php

Recommend this page to a friend!
  Classes of greg jackson   Spider Class   spiderExamp.php   Download  
File: spiderExamp.php
Role: Example script
Content type: text/plain
Description: Example usage - NOTE: BE KIND TO THE BBC AND DO NOT RUN THIS WITHOUT CHANGING THE PARAMETERS
Class: Spider Class
Crawl a site following and retrieving linked pages
Author: By
Last change: Removed links to unnecessary scripts
Date: 19 years ago
Size: 1,297 bytes
 

Contents

Class file image Download
<?php

require_once("spiderClass.php");

getSport();

exit;

function
getSport ($strSport="/football/", $strDetail1="/middlesbrough|boro\b/", $strDetail2="/prem/"){
   
$strStartURL = "http://www.bbc.co.uk";
   
$arrLinksRegex = array(1 => array("/sport/"), 2 => array($strSport, $strDetail1, $strDetail2), 3 => array($strDetail1, $strDetail2), 3 => array($strDetail1));
   
$objSportSpider = new spiderScraper;
   
$objSportSpider -> spiderStart($strStartURL);
   
$objSportSpider -> arrLinksRegex = $arrLinksRegex;
   
$objSportSpider -> intCrawlDepth = 4;

    for (
$i = 1; $i <= 50; $i++) {
       
$timePrev = $objSportSpider->timeLapsed;
       
$arrFetchedPage = $objSportSpider -> spiderNextPage();
        if(
$arrFetchedPage["error"]>0){
            echo
"<br>Error: ".$arrFetchedPage["errortext"];
        } else {
            echo
$i.": Depth: ".$objSportSpider->intCurrentDepth." -Seq: ".$objSportSpider->intCurrentSequence." ".($objSportSpider->timeLapsed - $timePrev)."secs - ";
            echo
" URL: ".$arrFetchedPage[0]."<br><hr>";
            echo
"<br>";
            if(
array_key_exists(1,$arrFetchedPage) && isset($arrFetchedPage[1])){
                if(
preg_match($strDetail1,$arrFetchedPage[1])>0){
                    echo
$arrFetchedPage[1]."<br><hr>";
                }
            }
        }
    }
    echo
"total time: ".$objSportSpider->timeLapsed." secs<br>";
}
// end function
?>