PHP Classes

File: example5.php

Recommend this page to a friend!
  Classes of Alexey G. Piyanin   HTML SAX Parser   example5.php   Download  
File: example5.php
Role: Example script
Content type: text/plain
Description: Example #5 (get page part - news on yahoo.com)
Class: HTML SAX Parser
Parse HTML documents using regular expressions
Author: By
Last change: fix some error in paring
Date: 18 years ago
Size: 2,053 bytes
 

Contents

Class file image Download
<?
/*
Author: Alexey G. Piyanin (e-mail: drdrzlo at mail dot ru)
Date: Jun 7 2006
Title: Get page part
*/
include('SAXParser.php');

function
begin($tag,$attributes,$readSize){
  global
$stack,$t,$isBeginNews,$news,$currentNewsIndex;
  if (!
in_array($tag,$t)) array_unshift($stack,$tag);
  if (
$isBeginNews){
    if (
$tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
     
$news[$currentNewsIndex]['href'] = $attributes['href'];
    }elseif(
$currentNewsIndex>0 && $tag=='table' && join('/',$stack)=='table/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
      return -
1;
    }
  }
}

function
endTag($tag,$readSize){
  global
$stack,$isBeginNews,$news,$currentNewsIndex;
  if (
$isBeginNews && $tag=='a' && join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html'){
   
$currentNewsIndex++;
  }
  while(
reset($stack)!=$tag && !empty($stack)) array_shift($stack);
 
array_shift($stack);
}

function
character($str){
  global
$stack,$isBeginNews,$news,$currentNewsIndex;
 
//----
 
if (!$isBeginNews){
    if (
join('/',$stack)=='font/a/b/td/tr/table/td/tr/table/td/tr/table/font/center/body/html' && strtolower($str)=='in the news') $isBeginNews = true; // begin "In the News" part
 
}else{
    if (
join('/',$stack)=='a/font/td/tr/table/td/tr/table/td/tr/table/font/center/body/html')
     
$news[$currentNewsIndex]['text'] = $str;
  }
}

$t = array('br','meta','img','spacer','input','base','hr','link',);
$stack = array();
$URL = 'http://yahoo.com';

$isBeginNews = false;

$currentNewsIndex = 0;
$news = array();
$parser = new HTML_SAXParser();
$parser->initFunc('begin','endTag','character');?>
<html>
<body>
<center>Source page:<br><iframe src="<?=$URL?>" width="600" height="400" ></iframe><br><br></center>
News list (part "In the News"):<br>
<?$parser->parse($URL);
foreach(
$news as $row){?>
<a href="<?=$URL.'/'.$row['href']?>" target="_blank"><?=$row['text']?></a><br>
<?}?>
</body></html>