ãã®ãããã¯ã§ã¯ãç§ã®æèŠã§ã¯ãPHPçšã®ãã«ãã¹ã¬ããcURLããŠã³ããŒãã®äŸ¿å©ã§æ©èœçãªå®è£
ãæ瀺ãããŠããŸãã ããããããã¯èª°ãã«åœ¹ç«ã€ã§ãããããæåŸ
ãç§ããããããŸã;ïŒ
èå³ã®ãªãæ ãè
ã§ãã£ãŠããcURLçµç±ã®ããŠã³ããŒãã¯äœ¿çšããŸããã§ããã ã³ã³ãœãŒã«ããã§ããããçš®ã®èšèªã§ã³ãŒããå®è£
ããããšã§ãå¯èœã§ãã 1ã€ã®ãªã³ã¯ã®ããŠã³ããŒãããããã¯ãããœãªã¥ãŒã·ã§ã³ã¯ãããšãã°
php.netãªã©ããããã¯ãŒã¯ã®é
ã
ã«ãããŸãã ãã ããPHPã§ã®å®è£
ãæ€èšããå Žåããã®æ¹æ³ã¯ãè£å©æäœïŒDNSã«ãã¯ã¢ããããªã¯ãšã¹ãåŸ
æ©ãªã©ïŒã«æéãããããããé©åã§ãªãå ŽåããããŸãã å€æ°ã®ããŒãžãããŠã³ããŒãããå Žåãé 次ããŒãžã§ã³ã¯åãå
¥ããããŸããã ããªããæºè¶³ããŠããå Žå-ããªãã¯ãã以äžèªãããšãã§ããŸãã:)
ããšãã°ãPerlã§ã¯ã
fork ïŒïŒãŸãã¯threadïŒ
use threads ïŒã
䜿çšããŠãã·ã³ã°ã«ã¹ã¬ããã®ããŠã³ããŒãã䞊ååã§ããŸãã ããã¯ããã®èšèªã®ã©ã€ãã©ãªã®è±å¯ãªæ©èœãã«ãŠã³ãããŠããŸããã å人çã«ã¹ã¬ãããšLWPãé©çšããŸããã ãã ããPHPã«ã€ããŠè©±ããŠãããããååçã«ãã®æ©èœããªãããã䞊ååã«ã¯å€§ããªåé¡ããããŸãã
ã¹ã¬ãããäœæããæ¹æ³ãç¥ã£ãŠãã人ããããæããŠãã ããã ã¯ããcURLã«ã¯
curl_multi_ *é¢æ°ããããŸããããããã«åºã¥ãå®è£
ã®äŸã¯ç§ã«ã¯åããŠããŸããã§ããã ãããŠãæçµçã«ã¯ãèªè»¢è»ãçµã¿ç«ãŠãããšã«ããŸããã
æåã¯ã
offããæãåçŽãªäŸãåç
§
ããŸãã åèæž ã ããã«æã£ãŠããŠãã ãã:)
<ïŒphp
//äž¡æ¹ã®cURLãªãœãŒã¹ãäœæããŸã
$ ch1 = curl_init ïŒïŒ;
$ ch2 = curl_init ïŒïŒ;
// URLããã³ãã®ä»ã®é©åãªãªãã·ã§ã³ãèšå®ããŸã
curl_setopt ïŒ $ ch1 ã CURLOPT_URL ã " www.example.com " ïŒ;
curl_setopt ïŒ $ ch1 ã CURLOPT_HEADER ã 0 ïŒ;
curl_setopt ïŒ $ ch2 ã CURLOPT_URL ã " www.php.net " ïŒ;
curl_setopt ïŒ $ ch2 ã CURLOPT_HEADER ã 0 ïŒ;
//è€æ°ã®cURLãã³ãã«ãäœæããŸã
$ mh = curl_multi_init ïŒïŒ;
// 2ã€ã®ãã³ãã«ãè¿œå ããŸã
curl_multi_add_handle ïŒ $ mh ã $ ch1 ïŒ;
curl_multi_add_handle ïŒ $ mh ã $ ch2 ïŒ;
$ running = null ;
//ãã³ãã«ãå®è¡ããŸã
{
curl_multi_exec ïŒ $ mh ã $ running ïŒ;
} whileïŒ $ running > 0 ïŒ;
//ãã³ãã«ãéããŸã
curl_multi_remove_handle ïŒ $ mh ã $ ch1 ïŒ;
curl_multi_remove_handle ïŒ $ mh ã $ ch2 ïŒ;
curl_multi_close ïŒ $ mh ïŒ;
ïŒ>
ãã®ã³ãŒãã¯ãã¢ããªã±ãŒã·ã§ã³ã³ãŒããšã©ã€ãã©ãªã®çžäºäœçšã®ããè€éãªæ§æã«ãããã·ã³ã°ã«ã¹ã¬ããã¢ãããŒããšã¯ç°ãªããŸãã
1ïŒåæ¥ç¶ã«ã¯ç¬èªã®
curl_init ïŒïŒãããããã©ã¡ãŒã¿ãŒã¯
curl_setopt ïŒïŒã§èšå®ãããŸãã ããã§ã¯ãã¹ãŠãæšæºã§ãã説æãªãã«åŒçšããŸãã
2ïŒ
curl_multi_init ïŒïŒåŒã³åºãã®ããŠã³ããŒãã®äžè¬çãªå¶åŸ¡ã®ããã«ãåå¥ã®èšè¿°åãäœæããããããä»ããŠãã¹ãŠã®ãããªãäœæ¥ãå®è¡ãããŸãã
3ïŒæå®ãããèšè¿°åãžã®
curl_multi_add_handle ïŒïŒåŒã³åºãã¯ãæåã«å¥ã®æ¥ç¶ã
äœæããŸã ã
æºå段éãå®äºããä»ããçŽæ¥ããŠã³ããŒãããŸãïŒ
4ïŒã©ã€ãã©ãªã¯èªåçã«ããŠã³ããŒããããŸã;
curl_exec ïŒïŒã®ããã«æ瀺çãªåŒã³åºãã¯ãããããŸããã
curl_multi_exec ïŒïŒãç¹°ãè¿ãåŒã³åºãããšã§çœ®ãæããããŸãã ååã¯äŒŒãŠããŸããããã®é¢æ°ã¯ãããã«ç°ãªã圹å²ãæãããŸã-ã¢ã¯ãã£ããªã¹ã¬ããã®æ°ã®å€åããããã¯ããŠéç¥ããŸãïŒçºçãããšã©ãŒïŒã 2çªç®ã®ãã©ã¡ãŒã¿ãŒã¯ãåŒã³åºããããšãçŸåšã¢ã¯ãã£ããªæ¥ç¶ã®æ°ãæ ŒçŽããæ°å€å€æ°ãžã®åç
§ã§ãã æ°éãå€æŽãããŸãã-ããã¯ãäžéšã®ã¹ã¬ãããäœæ¥ãå®äºããããšãæå³ããŸãã ãã®ãããããŠã³ããŒããµã€ã¯ã«ã¯
{
curl_multi_exec ïŒ $ mh ã $ running ïŒ;
} whileïŒ $ running > 0 ïŒ;
5ïŒæåŸã«ãããŠã³ããŒãåŸããªãœãŒã¹ã解æŸãããŸãã éèŠïŒ
curl_init ïŒïŒã«ãã£ãŠäœæãããæ¥ç¶ã¯ã¡ã€ã³èšè¿°åã«ãåºå·ãããŸãããèªåçã«ã¯éããŸãã
ãcurl_close ïŒïŒã«å ããŠ
curl_multi_remove_handle ïŒïŒãåŒã³åºããŠæåã§éããå¿
èŠããããŸãã
誰ãããã®ãããªå®è£
ãååã«æã£ãŠãããããããã圌ãã¯ãã以äžèªãããšãã§ããªããããããŸããã ããã«é²ãã§ãããŸãã
ãã®å®è£
ã®äœãæªãã®ã§ããïŒ æãæçœãªãã€ã³ãã®ããã€ãïŒ
- ã³ãŒãã§çŽæ¥æå®ããã2ã€ã®ãªã³ã¯ã®ããŠã³ããŒãã«é¢ããå³ããå¶é
- çµæã®ããŒãžã¯STDOUTã«çŽæ¥è¡šç€ºãããŸã
ããã¯ã»ãã®äžéšã§ãããæ®ãã¯ä»¥äžã§èª¬æããŸãã
ç§ã¯ãããã®æ¬ ç¹ãä¿®æ£ããããšãã°æ¬¡ã®ããã«ãªããŸãïŒ
<ïŒphp
$ urls = arrayïŒ " www.example.com " ã " www.php.net " ïŒ;
$ mh = curl_multi_init ïŒïŒ;
$ chs = arrayïŒïŒ;
foreachïŒ $ urls as $ url ïŒ{
$ chs [] =ïŒ $ ch = curl_init ïŒïŒïŒ;
curl_setopt ïŒ $ ch ã CURLOPT_URL ã $ url ïŒ;
curl_setopt ïŒ $ ch ã CURLOPT_HEADER ã 0 ïŒ;
// CURLOPT_RETURNTRANSFER-é¢æ°ã®çµæãšããŠå€ãè¿ããstdoutã«ã¯åºåããŸãã
curl_setopt ïŒ $ ch ã CURLOPT_RETURNTRANSFER ã 1 ïŒ;
curl_multi_add_handle ïŒ $ mh ã $ ch ïŒ;
}
$ prev_running = $ running = null ;
{
curl_multi_exec ïŒ $ mh ã $ running ïŒ;
ifïŒ $ running ïŒ= $ prev_running ïŒ{
//çŸåšã®æ¥ç¶ã«é¢ããæ
å ±ãååŸããŸã
$ info = curl_multi_info_read ïŒ $ mh ïŒ;
ifïŒ is_array ïŒ $ info ïŒ&&ïŒ $ ch = $ info [ 'handle' ]ïŒïŒ{
//ããŒããããããŒãžã®ã³ã³ãã³ããååŸããŸã
$ content = curl_multi_getcontent ïŒ $ ch ïŒ;
//ããã«ããçš®ã®ããŒãžããã¹ãåŠç
//çŸæç¹ã§ã¯å
ã®ããã«-STDOUTãžã®åºå
ãšã³ãŒ $ã³ã³ãã³ã ;
}
//çŸåšã¢ã¯ãã£ããªæ¥ç¶ã®ãã£ãã·ã¥æ°ãæŽæ°ããŸã
$ prev_running = $ running ;
}
} whileïŒ $ running > 0 ïŒ;
foreachïŒ $ chs as $ ch ïŒ{
curl_multi_remove_handle ïŒ $ mh ã $ ch ïŒ;
curl_close ïŒ $ ch ïŒ;
}
curl_multi_close ïŒ $ mh ïŒ;
ïŒ>
ããã«ãã»ãšãã©ã®å ŽåãSTDOUTã§ããŒãžã衚瀺ããã®ã¯éåžžã«ç°¡åã§ãã ããã«ãããã¯å®éã®ããŠã³ããŒãã®é åºã«å¿ããŠã©ã³ãã ãªé åºã§çºçããŸãïŒãžã§ãã¯
curl_multi_add_handle ïŒïŒãåŒã³åºããŸããïŒã ãŸãã倧éã®ããªã¥ãŒã ãããŠã³ããŒããããå Žåããã¹ãŠã®ããŒãžãåä¿¡ãããã®ãåŸ
ã€ããšã¯æå³ããããŸãããåä¿¡ããããŒãžã®åŠçãéå§ã§ããŸãã ãããããã¹ãŠãäžæ¬ã§ååŸãããªãã·ã§ã³ããé¢éžãã䟡å€ã¯ãããŸããã
ãããè¡ãã«ã¯ã1ïŒé¢æ°ã®åœ¢åŒã§ãã¹ãŠãå®è£
ããŸãã2ïŒåä¿¡ããåãã¡ã€ã«ã«å¯ŸããŠåŒã³åºãããã³ãŒã«ããã¯é¢æ°ãæå®ãããã©ã¡ãŒã¿ãŒãå°å
¥ããŸãã ã³ãŒã«ããã¯ãèšå®ãããŠããªãå Žåããã¹ãŠã®ããŒãžãäžåºŠã«ååŸãããªãã·ã§ã³ãé©çšãããŸãã 以äžã«äŸã瀺ããŸãã
<ïŒphp
//åçŽãªã³ãŒã«ããã¯ã®äŸã å®è³ªçã«ãããŒæ©èœã
function my_callback ïŒ $ url ã $ content ã $ curl_status ã $ ch ïŒ{
echo "ããŒãžã®ããŠã³ããŒã[$ url]" ;
ifïŒïŒ $ curl_status ïŒ{
ãšã³ãŒã¯ ãæåããŸããã ããŒãžããã¹ãïŒ\ n $ content \ n " ;
}
ãã®ä»{
echo "ãšã©ãŒã§å€±æããŸããïŒ$ curl_statusïŒ" ã curl_error ïŒ $ ch ïŒã "\ n" ;
}
}
function http_load ïŒ $ urls ã $ callback = false ïŒ{
$ mh = curl_multi_init ïŒïŒ;
$ chs = arrayïŒïŒ;
foreachïŒ $ urls as $ url ïŒ{
$ chs [] =ïŒ $ ch = curl_init ïŒïŒïŒ;
curl_setopt ïŒ $ ch ã CURLOPT_URL ã $ url ïŒ;
curl_setopt ïŒ $ ch ã CURLOPT_HEADER ã 0 ïŒ;
// CURLOPT_RETURNTRANSFER-é¢æ°ã®çµæãšããŠå€ãè¿ããstdoutã«ã¯åºåããŸãã
curl_setopt ïŒ $ ch ã CURLOPT_RETURNTRANSFER ã 1 ïŒ;
curl_multi_add_handle ïŒ $ mh ã $ ch ïŒ;
}
// $ã³ãŒã«ããã¯ãfalseã«èšå®ãããŠããå Žåãé¢æ°ã¯$ã³ãŒã«ããã¯ãåŒã³åºãã¹ãã§ã¯ãããŸããããäœæ¥ã®çµæãšããŠããŒãžãè¿ããŸã
ifïŒ $ callback === false ïŒ{
$ results = arrayïŒïŒ;
}
$ prev_running = $ running = null ;
{
curl_multi_exec ïŒ $ mh ã $ running ïŒ;
ifïŒ $ running ïŒ= $ prev_running ïŒ{
//çŸåšã®æ¥ç¶ã«é¢ããæ
å ±ãååŸããŸã
$ info = curl_multi_info_read ïŒ $ ghandler ïŒ;
ifïŒ is_array ïŒ $ info ïŒ&&ïŒ $ ch = $ info [ 'handle' ]ïŒïŒ{
//ããŒããããããŒãžã®ã³ã³ãã³ããååŸããŸã
$ content = curl_multi_getcontent ïŒ $ ch ïŒ;
//ããŠã³ããŒããããªã³ã¯
$ url = curl_getinfo ïŒ $ ch ã CURLINFO_EFFECTIVE_URL ïŒ;
ifïŒ $ callback ïŒ== false ïŒ{
//ã³ãŒã«ããã¯ãã³ãã©ãŒãåŒã³åºããŸã
$ã³ãŒã«ããã¯ ïŒ $ url ã $ content ã $ info [ 'result' ]ã $ ch ïŒ;
}
ãã®ä»{
//çµæã®ããã·ã¥ã«è¿œå ããŸã
$ results [ $ url ] = arrayïŒ 'content' => $ content ã 'status' => $ info [ 'result' ]ã 'status_text' => curl_error ïŒ $ ch ïŒïŒ;
}
}
//çŸåšã¢ã¯ãã£ããªæ¥ç¶ã®ãã£ãã·ã¥æ°ãæŽæ°ããŸã
$ prev_running = $ running ;
}
} whileïŒ $ running > 0 ïŒ;
foreachïŒ $ chs as $ ch ïŒ{
curl_multi_remove_handle ïŒ $ mh ã $ ch ïŒ;
curl_close ïŒ $ ch ïŒ;
}
curl_multi_close ïŒ $ mh ïŒ;
//çµæ
returnïŒ $ callback ïŒ== false ïŒïŒ true ïŒ $ results ;
}
$ urls = arrayïŒ " www.example.com " ã " www.php.net " ïŒ;
//ç°¡åãªçºè¡ã®ãªãã·ã§ã³
print_r ïŒ http_load ïŒ $ urls ïŒïŒ;
//ã³ãŒã«ããã¯ä»ããªãã·ã§ã³
var_export ïŒ http_load ïŒ $ urls ã my_callback ïŒïŒ;
ïŒ>
ãã§ã«ã¯ããã«èå³æ·±ãã éèŠãªç¹ïŒã³ãŒã«ããã¯ã®å Žåã4çªç®ã®ãã©ã¡ãŒã¿ãŒã¯$ chæ¥ç¶èšè¿°åã§ãããããã·ã¥ãåºåããå Žåããšã©ãŒã®åãªãæååã®èª¬æã§ãïŒãã¹ãŠãåé¡ãªããã°ã空ã®æååã§ãïŒã ãªãã§ïŒ curl_errorïŒïŒã¯èšè¿°åãæž¡ãå¿
èŠããããé¢æ°ã®æåŸã§çµäºããŸãã ãã®ãããã³ãŒã«ããã¯ã§ã¯ãŸã ååšãã䜿çšã§ããŸãããããã·ã¥ã§ã¯äœã®äŸ¡å€ãäžããããŸããã ãŸãã¯ããšã©ãŒã³ãŒãã®æååã®èª¬æã¯
ãã¡ãã«ãããŸã ã
ããã§ã¯ãå
ã«é²ã¿ãŸãããã ãªã³ã¯ã®é
åã«å¯ŸããŠã ãã§ãªããåäžã®ããŒãžãããŠã³ããŒãã§ããããã«é¢æ°ãåŒã³åºãããã§ãã ãããè¡ãã«ã¯ã1è¡ã ãè¿œå ããŸãã
<ïŒphp function http_load ïŒ $ urls ã $ callback = false ïŒ{
...
//å¯äžã®ãã©ã¡ãŒã¿ãæž¡ãããŠããé
åèŠçŽ ãšã¿ãªããŸã
//ããã¯ã¢ããã°ã§ãïŒ$ urls = is_arrayïŒ$ urlsïŒïŒ $ urlsïŒé
åïŒ$ urlsïŒ;
$ urls =ïŒé
åïŒ $ urls ;
.... ïŒ>
ããã§ããªã³ã¯ãäžåºŠã«1ã€ãã€ããŠã³ããŒãã§ããŸãïŒhttp_loadïŒ 'google.com'ïŒã ããçš®ã®åºæ¬ãžã®ååž°ã
次ã«ãæ¥ç¶çšã«ããã«å€ãã®éä¿¡ããããŒãèšå®ããå¿
èŠããããŸããã curl_setoptïŒïŒã§äžåºŠã«1ã€ãã€æå®ããã®ã¯å®çšçã§ã¯ãããŸããã
curl_setopt_arrayé¢æ°ã䜿çšããããšãã
å§ãããŸãã ããçŽããŠååŸïŒã³ãŒãã®äžéšïŒïŒ
<ïŒphp
{ //ãã¹ãŠã®æ¥ç¶ã«å
±éã®ããããŒ
$ ext_headers = arrayïŒ
ãæåŸ
ïŒã ã
'AcceptïŒtext / htmlãapplication / xhtml + xmlãapplication / xml; q = 0.9' ã
'Accept-LanguageïŒruãen-us; q = 0.7ãen; q = 0.7' ã
// 'Accept-EncodingïŒgzipãdeflate'ã//åŸã§è§£åããå¿
èŠããããŸãã ããŠãä»ã®ãšãã...
'æåã»ãããåãå
¥ããïŒutf-8ãwindows-1251; q = 0.7ã*; q = 0.5' ã
ïŒ;
$ curl_options = arrayïŒ
CURLOPT_PORT => 80 ã
CURLOPT_RETURNTRANSFER => 1 ã //é¢æ°ã®çµæãšããŠå€ãè¿ããstdoutã«ã¯åºåããŸãã
CURLOPT_BINARYTRANSFER => 1 ã //ãã€ããªã»ãŒãã«æž¡ããŸã
CURLOPT_CONNECTTIMEOUT => 10 ã //æ¥ç¶ã¿ã€ã ã¢ãŠãïŒã«ãã¯ã¢ãã+æ¥ç¶ïŒ
CURLOPT_TIMEOUT => 30 ã //ããŒã¿åä¿¡ã®ã¿ã€ã ã¢ãŠã
CURLOPT_USERAGENT => 'Mozilla / 5.0ïŒX11; U; Linux x86_64; en-US; rvïŒ1.9.1.1ïŒGecko / 20090716 Ubuntu / 9.04ïŒjauntyïŒShiretoko / 3.5.1' ã
CURLOPT_VERBOSE => 2 ã //æ
å ±ã¬ãã«
CURLOPT_HEADER => 0 ã //ããããŒã¯æ©èœããŸãã
CURLOPT_FOLLOWLOCATION => 1 ã //ãªãã€ã¬ã¯ãã«åŸã
CURLOPT_MAXREDIRS => 7 ã //ãªãã€ã¬ã¯ãã®æ倧æ°
CURLOPT_AUTOREFERER => 1 ã //ãªãã€ã¬ã¯ãããå ŽåããRefererïŒãããLocationïŒãã®å€ã«çœ®ãæããŸã
// CURLOPT_FRESH_CONNECT => 0ã//æ¯åæ°ããæ¥ç¶ã䜿çš
CURLOPT_HTTPHEADER => $ ext_headers ã
ïŒ;
}
function http_load ïŒ $ urls ã $ callback = false ïŒ{
ã°ããŒãã« $ curl_options ;
$ mh = curl_multi_init ïŒïŒ;
ifïŒ $ mh === false ïŒ falseãè¿ã ;
$ urls =ïŒé
åïŒ $ urls ;
$ chs = arrayïŒïŒ;
foreachïŒ $ urls as $ url ïŒ{
$ chs [] =ïŒ $ ch = curl_init ïŒïŒïŒ;
curl_setopt_array ïŒ $ ch ã $ curl_options ïŒ; //ããããŒãäžæ¬ã§èšå®ããŸã
curl_setopt ïŒ $ ch ã CURLOPT_URL ã $ url ïŒ;
curl_multi_add_handle ïŒ $ mh ã $ ch ïŒ;
}
...
ïŒ>
Firefoxã®ãµããããŸãã èŠåºãã«ã€ããŠã³ã¡ã³ãããŸããã 詳现ãªèª¬æã«ã€ããŠã¯ã
ãã¡ãã«éä¿¡ããŠ
ãã ãã ã
ãããŠããããã®ããããŒã远跡ããããã«ãé¢æ°ã«3çªç®ã®ãã©ã¡ãŒã¿ãŒãè¿œå ãããŸãã
<?php function http_load( $urls, $callback = false, $urls_params = array() ) {} ?>
ããããŒãæå®ã§ããŸããããããŒã¯ãåæåæã«æ¥ç¶ã«è¿œå ãããŸãã ãããã£ãŠããã©ã¡ãŒã¿ãŒã䜿çšããŠPOSTãªã¯ãšã¹ããæ£åžžã«éä¿¡ãããã玹ä»ãéä¿¡ããŒã¿ã®åœ¢åŒãæå®ãããã§ããŸãïŒããšãã°ãå§çž®äžïŒã
<ïŒphp
...
foreachïŒ $ urls as $ ind => $ url ïŒ{
$ chs [] =ïŒ $ ch = curl_init ïŒïŒïŒ;
curl_setopt_array ïŒ $ ch ã $ curl_options ïŒ; //ããããŒãäžæ¬ã§èšå®ããŸã
curl_setopt ïŒ $ ch ã CURLOPT_URL ã $ url ïŒ;
//ãã®æ¥ç¶ãåæåããè¿œå ã®ãã©ã¡ãŒã¿ãŒã¯ãããŸããïŒ
ifïŒissetïŒ $ urls_params [ $ ind ]ïŒ&& is_array ïŒ $ urls_params [ $ ind ]ïŒïŒ{
curl_setopt_array ïŒ $ ch ã $ urls_params [ $ ind ]ïŒ;
}
curl_multi_add_handle ïŒ $ mh ã $ ch ïŒ;
}
...
ïŒ>
ããããã®ãããªé¢æ°ã§ãã ãŸããCookieãšPOSTãªã¯ãšã¹ãã®æäœã«ã€ããŠæžãããšãã§ããŸãããããã¯æåŸ
ãåããå Žåã§ãã ãããŠã圌ã¯ããããæžãããäœäººããã¹ã¿ãŒãããïŒ ;ïŒ