分类:
2009-12-30 18:17:11
/**
* 定义了每个采集插件需要使用的类,
* Main 主体类
* Log
* Content
* Status
*/
require_once('Gather.func.php');
abstract class Gather_Base{
//类的构造参数
private $gl_ID;
private $OS;
protected $RMT_name;
private $Encoding;
//类的私有参数
private $timeStart;
private $timeEnd;
private $eclapse;
private $readPages;
private $log_file;
private $retStatus;
private $db;
private $nativeEncoding;
protected $areasSell=Array();
protected $areasBuy=Array();
private $games=array();
/**
* 构造方法
*
* @param unknown_type $RMT_name 这个网站的RMT名称,要和管理系统中相对应
* @param unknown_type $Encoding 这个网站页面的编码
* @return Gather_Base
*/
public function Gather_Base($Encoding='shift-jis'){
$this->RMT_name=get_class($this);
$this->Encoding=$Encoding;
}
/**
* 抽象方法,由子类具体实现
* 得到游戏列表 到 games属性中 *
*/
abstract function getGames();
/**
* 获取所有游戏的数据
*
*/
final function getData(){
while($data=$this->getGame()){
$name=$data['name'];
$type=strtolower($data['type']);
$url=$data['url'];
if($type=='sell'||$type=='sale'){
$this->showGame($name,$type);
$price=$this->getSellPrice($name,$url);
if(isset($price['name']))
$this->areasSell[$price['name']]=$price['price'];
else
$this->areasSell[$name]=$price;
}
if($type=='buy'){
$this->showGame($name,$type);
$price=$this->getBuyPrice($name,$url);
if(isset($price['name']))
$this->areasBuy[$price['name']]=$price['price'];
else
$this->areasBuy[$name]=$price;
}
}
}
abstract function getBuyPrice($name,$url);
abstract function getSellPrice($name,$url);
/**
* 添加一个游戏列表中的数据到games属性中
*
* @param unknown_type $name 游戏名称
* @param unknown_type $type 买/卖
* @param unknown_type $url 页面地址
*/
protected final function setGame($name,$type,$url){
$this->games[]=array(
'name'=>$name,
'type'=>strtolower($type),
'url'=>$url
);
}
/**
* 取出一个游戏列表中的数据
*
* @return unknown 数组(name,type,url)
*/
private function getGame(){
return array_shift($this->games);
}
// private final function setEncoding($encoding){
// $this->Encoding=$encoding;
// }
/**
* 为子类提供一个方法,用来判断从一个URL来的内容是不是已经更新过的
*
* @param unknown_type $url URL地址
* @param unknown_type $doc 页面内容
* @return unknown 是否是更新过的
*/
protected final function isNew($url,$doc){
$md5=md5($doc);
$sql="select count(*) from rmt_gather_md5 where url='".$url."'";
$count=$this->db->Single($sql);
if($count){
$sql="select md5 from rmt_gather_md5 where url='".$url."'";
$lastMd5=$this->db->Single($sql);
if($lastMd5==$md5){
return false;
}else{
$sql="update rmt_gather_md5 set md5='".$md5."' ,updated=".time()." where url='".$url."'";
$this->db->Query($sql);
return true;
}
}else{
$sql="insert into rmt_gather_md5(url,md5,updated) values('$url','$md5',".time().");";
$this->db->Query($sql);
return true;
}
}
/**
* 由主程序调用 设置几个参数
*
* @param unknown_type $gl_ID
* @param unknown_type $OS
* @param unknown_type $db
*/
public final function set($gl_ID,$OS,$db,$nativeEncoding){
$this->gl_ID=$gl_ID;
$this->OS=$OS;
$this->timeStart=time();
if($OS=='Win'){
if(!file_exists("./log")){mkdir("./log");}
if(!file_exists("./log/".$this->RMT_name)){mkdir("./log/".$this->RMT_name);}
}
$this->log_file='./log/'.$this->RMT_name.'/'.date('Ymd_His_').$gl_ID.'.txt';
$this->db=$db;
$this->nativeEncoding=$nativeEncoding;
}
/**
* 由主程序调用
*
* @return 返回主程序需要的所有数据
*/
public final function ending(){
$this->timeEnd=time();
$this->eclapse=$this->timeEnd - $this->timeStart;
return array(
'RMT_name'=>$this->RMT_name,
'Buy'=>$this->areasBuy,
'Sell'=>$this->areasSell,
'Status'=>$this->retStatus,
'timeStart'=>$this->timeStart,
'timeEnd'=>$this->timeEnd,
'eclapse'=>$this->eclapse,
'readPages'=>$this->readPages
); //程序结束
}
/**
* 模式匹配,带错误检查
*
* @param string $reg 正则表达式
* @param string $doc 被搜索的文档
* @param bool $enableNone 是否允许无匹配
* @return array 匹配结果数组
*/
protected final function pregMatchAll($reg,$doc,$enableNone=false){
$matchs=array();
$r=preg_match_all($reg,$doc,$matchs,PREG_SET_ORDER);
if($r===false){
$this->status(array(
'code'=>_T::GC_MATCH_WRONG,
'content'=>"reg:$reg\ndocument:<<<\n$doc\n>>>\n"
));
return false;
}
if(!$enableNone&&$r==0){
$this->status(array(
'code'=>_T::GC_MATCH_NONE,
'content'=>"reg:$reg\ndocument:<<<\n$doc\n>>>\n"
));
return false;
}
return $matchs;
}
/**
* 取得一个表格中的数据,去除各种HTML标识
*
* @param unknown_type $doc
* @return unknown 二维数组
*/
function getTableData($doc){
$doc=$this->getMiddle($doc,','
');
if(false===$doc){
return false;
}
$doc=$this->getMiddle($doc,'>');
if(false===$doc){
return false;
}
$trs=$this->getMiddles($doc,',' ');
//dump($trs);exit;
if(false===$trs){
return false;
}
$Table=array();
foreach($trs as $tr){
$tr=trim($this->getMiddle($tr,'>'));
if(false===$tr){
return false;
}
$tds=$this->getMiddles($tr,',' ');
$ths=$this->getMiddles($tr,',' ');
$tds=array_merge($ths,$tds);
$Row=array();
foreach($tds as $td){
$td=$this->getMiddle($td,'>',null,true);
$td=preg_replace('/(<[^>]*>)/','',$td);
$Row[]=$td;
}
$Table[]=$Row;
}
return $Table;
}
/**
* 模式匹配,带错误检查
*
* @param string $reg 正则表达式
* @param string $doc 被搜索的文档
* @param bool $enableNone 是否允许无匹配
* @return array 匹配结果数组
*/
protected final function pregMatch($reg,$doc,$enableNone=false){
$matchs=array();
$r=preg_match($reg,$doc,$matchs);
if($r===false){
$this->status(array(
'code'=>_T::GC_MATCH_WRONG,
'content'=>"reg:$reg\ndocument:<<<\n$doc\n>>>\n"));
return false;
}
if(!$enableNone&&$r==0){
$this->status(array(
'code'=>_T::GC_MATCH_NONE,
'content'=>"reg:$reg\ndocument:<<<\n$doc\n>>>\n"));
return false;
}
return $matchs;
}
/**
* 获取字符串中指定开头和结尾中间的内容(不包括开头和结尾标识),不区分大小写
*
* @param 要处理的字符串 $str
* @param 开头标识 $begin(如果空,则从字符串开头开始)
* @param 结尾标识 $end(如果空,则到字符串结尾)
*
* @return 返回中间的字符串,或者是错误代码
*/
protected final function getMiddle2($str,$begin=null,$end=null,$enableNone=false){
if($begin!==null){
$i=strpos($str,$begin);
if($i===false){
if($enableNone)return '';
$this->status(array(
'code'=>_T::GC_EXPLODE_NO_BEGIN,
'content'=>"begin:$begin\ndocument:<<<\n$str\n>>>\n"
));
return false;
}
$str=substr($str,$i+strlen($begin));
if($str===false){
if($enableNone||($i+strlen($begin)==strlen($str)))return '';
$this->status(array(
'code'=>_T::GC_EXPLODE_NO_MIDDLE,
'content'=>"begin:$begin\nend:$end\ndocument:<<<\n$str\n>>>\n"
));
return false;
}
}
if($end!==null){
$j=strpos($str,$end);
if($j===false){
if($enableNone)return $str;
$this->status(array(
'code'=>_T::GC_EXPLODE_NO_END,
'content'=>"end:$end\ndocument:<<<\n$str\n>>>\n"));
return false;
}
$str=substr($str,0,$j);
if($str===false){
if($enableNone)return '';
$this->status(array(
'code'=>_T::GC_EXPLODE_NO_MIDDLE,
'content'=>"begin:$begin\nend:$end\ndocument:<<<\n$str\n>>>\n"));
return false;
}
}
return $str;
}
protected final function getMiddle($str,$begin=null,$end=null,$enableNone=false){
if($begin)$begin=strtolower($begin);
if($end)$end=strtolower($end);
return $this->getMiddle2($str,$begin,$end,$enableNone);
}
//用来去除文档中的 protected final function eraseTable($doc){
$doc=preg_replace('/(]*>)/i','',$doc);
$doc=preg_replace('/(<\/table>)/i','',$doc);
$doc=preg_replace('/(]*>)/i','',$doc);
$doc=preg_replace('/(<\/tr>)/i','',$doc);
$doc=preg_replace('/(]*>)/i','',$doc);
$doc=preg_replace('/(<\/td>)/i','',$doc);
$doc=preg_replace('/(onclick="[^"]*")/i','',$doc);
$doc=preg_replace("/(onclick='[^']*')/i",'',$doc);
$doc=preg_replace("/(class='[^']*')/i",'',$doc);
$doc=preg_replace('/(class="[^"]*")/i','',$doc);
$doc=preg_replace('/\s*/i','',$doc);
$doc=str_replace('
','',$doc);
$doc=str_replace(',','',$doc);
return $doc;
}
//取得本段中所有出现的段落
protected final function getMiddles($doc,$start,$end){
$results=array();
$offset=0;
while(true){
$i=strpos($doc,$start,$offset);
if($i===false) return $results;
$offset=$i+strlen($start);
$j=strpos($doc,$end,$offset);
if($j===false)return $results;
$offset=$j;
$results[]=substr($doc,$i+strlen($start),$j-$i-strlen($start));
}
return null;
}
/**
* 取得url内容
* 码表转换
* @param unknown_type $url
* @return unknown
*/
protected final function get3($url){
$this->log(_T::readFile,$url);
@$doc=file_get_contents($url);
if(!$doc){
sleep(2);
echo "\nread url fail:$url \nretry\n";
@$doc=file_get_contents($url);
if(!$doc){
sleep(2);
echo "\nretry\n";
@$doc=file_get_contents($url);
if(!$doc){
echo "\nAbort\n";
}
}
}
if($doc===false) {
$this->status(array('code'=>_T::GC_ACCESS_DENY,'content'=>"[url=$url]url:$url[/url]"));
return false;
}
return $doc;
}
protected final function get2($url){
$doc=$this->get3($url);
$doc=mb_convert_encoding($doc,"utf-8",$this->Encoding);
$this->readPages ++;
//如果在Windows系统下,记录快照
if($this->OS=='Win'){
if(!file_exists("./snap")){mkdir("./snap");}
if(!file_exists("./snap/".$this->RMT_name)){mkdir("./snap/".$this->RMT_name);}
if(substr($url,0,7)=='http://')$url=substr($url,7);
if(substr($url,0,strlen($this->RMT_name))==$this->RMT_name)
$url=substr($url,strlen($this->RMT_name));
if(substr($url,0,1)=='/')$url=substr($url,1);
$url=str_replace('/','_',$url);
$url=str_replace(':','_',$url);
$url=str_replace('?','_',$url);
$url=str_replace('=','_',$url);
$filename='./snap/'.$this->RMT_name.'/'.date('Ymd_His_').$url;
if(!preg_match('/\.html?$/i',$filename))
$filename=$filename.".html";
file_put_contents($filename,$doc);
}
return $doc;
}
/**
* 使用指定编码获取文件内容
*
* @param unknown_type $url
* @param unknown_type $encoding
* @return unknown
*/
protected final function getWithEncoding($url,$encoding){
$oldEncoding=$this->Encoding;
$this->Encoding=$encoding;
$doc=$this->get($url);
$this->Encoding=$oldEncoding;
return $doc;
}
/**
* 获取一个url中的页面内容,转换成小写,去除注释和脚本
*
* @param unknown_type $url
* @return unknown
*/
protected final function get($url){
$doc=$this->get2($url);
if($doc===false)return $doc; //如果有错,将错误向上传递
//转换小写,去除注释和脚本
$doc=strtolower($doc);
while(true){
$i=strpos($doc,'');
if($j===false) return $partHead;
$doc=$partHead.substr($partTail,$j+3);
}
while(true){
$i=strpos($doc,'');
if($j===false) return $partHead;
$doc=$partHead.substr($partTail,$j+9);
}
$doc=str_replace(' ','',$doc);
return $doc;
}
/**
* 如果状态是正常,立即返回true
* 否则
* 记录错误到retStatus数组中,同时写到日志文件中 *
* @param unknown_type $err 通常是另一个程序的返回结果,可能包含错误信息
* @return bool 状态是否正常,如果是错误,返回false
*/
private final function status($err){
if(!is_array($err))return true;
if(!isset($err['code']))return true;
$code=$err['code'];
$content=$err['content'];
$this->retStatus[]=$err;
$this->log($code,$content);
return false;
}
protected final function showGame($name,$type=false){
$str=mb_convert_encoding($name,$this->nativeEncoding,'utf-8');
if(strtolower($type)=='preread')
echo "Preread Game: $str \n";
elseif(strtolower($type)=='process')
echo "Process Game: $str \n";
elseif(strtolower($type)=='sell')
echo "Sell Game: $str \n";
elseif(strtolower($type)=='buy')
echo "Buy Game: $str \n";
else
echo "Game: $str \n";
}
protected final function showArea($name,$type=false){
$str=mb_convert_encoding($name,$this->nativeEncoding,'utf-8');
if(strtolower($type)=='sell')
echo "\t Sell Area: $str \n";
elseif(strtolower($type)=='buy')
echo "\t Buy Area: $str \n";
else
echo "\tArea: $str \n";
}
/**
* 记录日志
*
* @param string $name 日志项的名称
* @param string $content 日志项的内容
*/
protected final function log($name,$content){
$str="Log\n";
$str.="\tTime:".date('Y-m-d H:i:s')."\n";
$str.="\tName:$name\n";
$str.="\tContent:$content\n";
if($this->OS=='Win'){
file_put_contents($this->log_file,$str,FILE_APPEND);
}
}
/**
* 大航海游戏的扩展,一个区扩展成为三个线
*
* @param unknown_type $area 区域数据
* @param unknown_type $name 区域名称
* @return unknown
*/
protected final function expandSeal($area,$name){
return array(
$name.'マルセイユ'=>$area,
$name.'リスボン'=>$area,
$name.'ロンドン'=>$area
);
}
protected final function expandMu($areaName,$areaData){
return array(
$areaName.'混沌'=>$areaData,
$areaName.'生命'=>$areaData,
$areaName.'祝福'=>$areaData,
$areaName.'霊魂'=>$areaData
);
}
protected final function expandPsu($areaName,$areaData){
if($areaName=='エントランス1'){
}
}
protected function expandRohan($areaName,$areaData){
$thisGame=array();
if($areaName=='rengou1-2'||$areaName=='連合サーバー1'){
$thisGame['オン']=$areaData;
$thisGame['エドネ']=$areaData;
}elseif($areaName=='rengou2-2'||$areaName=='連合サーバー2'){
$thisGame['ロハ']=$areaData;
$thisGame['ゲイル']=$areaData;
$thisGame['マレア']=$areaData;
}else {
$thisGame[$areaName]=$areaData;
}
return $thisGame;
}
}
?>
阅读(388) | 评论(0) | 转发(0) |
给主人留下些什么吧!~~