在抓取需要用户登录的数据时,常使用到curl函数,本文给大家分享利用 php curl() 函数实现登录并抓取数据,curl在抓取网页的效率方面是比较高的,而且支持多线程。
平时使用 file_get_contents() 函数比较快捷,但效率就要稍低些,另外,使用curl函数时需要开启curl扩展。
<?php
//error_reporting(0);
$cookieVerify = dirname(__FILE__)."/verify.tmp";
$cookieSuccess = dirname(__FILE__)."/1769.tmp";
if(!$_POST){
// 获取cookie并保存
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://www.idc75.com/manage/userlogin.html");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieVerify);
$rs = curl_exec($ch);
curl_close($ch);
// 带上cookie抓取验证码,必须带上cookie,否则验证码不对应
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, "http://www.idc75.com/include/getcode.php");
curl_setopt($ch, CURLOPT_HEADER, 0);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify);
$rs = curl_exec($ch);
// 把验证码在本地生成,二次拉取验证码可能无法通过验证
@file_put_contents("verify.jpg",$rs);
curl_close($ch);
// 手工验证码表单
echo "<form action=\"\" method=\"post\"><input type=\"text\" name=\"vcode\"><img src=\"verify.jpg\" /><br><input type=\"submit\" value=\"ok\"></form>";
}else{
// 登录
$ch = curl_init();
// 用户名\密码
$user = "abc123";
$pass = "123456";
$verify = $_POST["vcode"];
$url = "http://www.idc75.com/userlogin.php?action=login";
// 返回结果存放在变量中,不输出
curl_setopt($ch, CURLOPT_URL, $url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieVerify);
curl_setopt($ch, CURLOPT_HEADER, 1);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 120);
curl_setopt($ch, CURLOPT_POST, true);
$fields_post = array("username"=> $user, "userpwd"=> $pass, "logintype"=>1,"vcode"=>$verify);
$headers_login = array("User-Agent" => "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36");
$fields_string = "";
foreach($fields_post as $key => $value){
$fields_string .= $key . "=" . $value . "&";
}
$fields_string = rtrim($fields_string , "&");
curl_setopt($ch, CURLOPT_HTTPHEADER, $headers_login);
curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieSuccess);
curl_setopt($ch, CURLOPT_POSTFIELDS, $fields_string);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
$result= curl_exec($ch);
print_r($result);
curl_close($ch);
// 登录成功,查看1769.tmp cookie文件有相应用户名等信息
}
?>