下面thinkphp框架教程栏目将给大家讲解thinkphp5.1 利用cli命令行+guzzle类库实现多线程爬虫,希望对需要的朋友有所帮助!
php think make:command Thread thread
测试能否成功执行
php think thread
文档地址:guzzle文档地址(https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html)
<?php /** * Created by. * User: Jim * Date: 2020/9/29 * Time: 14:31 */ namespace app\command; use GuzzleHttp\Client; use GuzzleHttp\Pool; use think\console\Command; use think\console\Input; use think\console\Output; /** * Guzzle * Class Thread * @package app\command * 文档地址 https://guzzle-cn.readthedocs.io/zh_CN/latest/quickstart.html */ class Thread extends Command { /** * 请求的总次数 * @var int */ protected $totalPageCount = 50; /** * 当前请求的次数 * @var int */ protected static $counter = 1; /** * 线程的数量 * @var int */ protected $threads = 20; protected function configure() { // 指令配置 $this->setName('thread'); // 设置参数 } protected function execute(Input $input, Output $output) { $client = new Client(); $requests = function ($total) use ($client) { foreach (range(null, $total) as $r) { $uri = 'https://apinew.juejin.im/content_api/v1/short_msg/detail'; yield function () use ($client, $uri) { return $client->postAsync($uri, [ 'verify' => false, 'json' => [ 'msg_id' => '6845185452727599118' ] ]); }; } }; $pool = new Pool($client, $requests($this->totalPageCount), [ 'concurrency' => $this->threads, // 请求成功 'fulfilled' => function ($response, $index) use ($output) { $res = $response->getBody()->getContents(); $output->writeln($res); $output->writeln("正在执行第{$index}个·····"); if ($this->checkThreadIsEnd() == true) { $output->writeln("------------请求结束---------"); return false; } }, // 请求失败 'rejected' => function ($reason, $index) use ($output) { $output->writeln("执行失败,{$reason}"); }, ]); $promise = $pool->promise(); $promise->wait(); } /** * 检测任务是否结束 * @return bool */ private function checkThreadIsEnd() { if (self::$counter < $this->totalPageCount) { self::$counter++; return false; } else { return true; } } }
php think thread