以下是我用在自己网站上的在线统计对搜索引擎的判断,无法判断的搜索引擎会将主机头信息写入日志,然后根据主机头信息分析新的搜索引擎厂商的特征然后扩充。
$searchengine='';
$agent = $_SERVER["HTTP_USER_AGENT"];
if (eregi('sogou',$agent))
{
$searchengine="sogou spider";
}
elseif (eregi('Baiduspider',$agent))
{
$searchengine="Baiduspider";
}
elseif (eregi('SpiderMan',$agent))
{
$searchengine="SpiderMan";
}
elseif (eregi('psbot',$agent))
{
$searchengine="psbot";
}
elseif (eregi('Googlebot',$agent))
{
$searchengine="Googlebot";
}
elseif (eregi('msnbot',$agent))
{
$searchengine="msnbot";
}
elseif (eregi('lanshanbot',$agent))
{
$searchengine="lanshanbot";
}
elseif (eregi('Slurp',$agent))
{
$searchengine="Slurp";
}
elseif (eregi('Snapbot',$agent))
{
$searchengine="Snapbot";
}
elseif (eregi('SurveyBot',$agent))
{
$searchengine="SurveyBot";
}
elseif (eregi('Gaisbot',$agent))
{
$searchengine="Gaisbot";
}
elseif (eregi('Gigabot',$agent))
{
$searchengine="Gigabot";
}
elseif (eregi('Ask Jeeves',$agent))
{
$searchengine="Ask Jeeves";
}
elseif (eregi('iaskspider',$agent))
{
$searchengine="iaskspider";
}
elseif (eregi('Yahoo! Slurp China',$agent))
{
$searchengine="Yahoo China";
}
elseif (eregi('Yahoo! Slurp',$agent))
{
$searchengine="Yahoo";
}
else//其它搜索引擎
{
foreach ($searchengine as $enginename => $engineagent)
if(strtolower($agent)==strtolower($engineagent))
$searchengine=$enginename;
}
if (empty($searchengine))
{
$searchengine ='Unknown:'.(strlen($agent)>15 ? substr($agent,0,15) : $agent);
}
//其它搜索引擎
$searchengine['mp3bot.de']='MP3Bot';
$searchengine['yahoo.com.cn']='Mozilla/5.0 (compatible; Yahoo! Slurp China; http://misc.yahoo.com.cn/help.html)';
$searchengine['yahoo-inc.com']='mp3Spider cn-search-devel at yahoo-inc dot com';
$searchengine['yahoo.com']='Yahoo';
$searchengine['yodao.com']='yodao';
$searchengine['wisenutbot.com']='wisenutbot';
$searchengine['Sosoimagespider']='Sosoimagespider';
$searchengine['soso.com']='Sosospider';
$searchengine['Sosoimagespider']='Sosoimagespider';
$searchengine['Youdao.com']='YoudaoBot'; |