Skip to content

Commit

Permalink
Replace Guzzle with RollingCurl
Browse files Browse the repository at this point in the history
Guzzle uses a batch curl method that blocks processing until the slowest request finishes
and returns data.  Effectively this means you have two choices: use small batch sizes
to expedite processing (since average wait time will be lower per-request) but suffer
from poor paralellism, or use large batch sizes to increase parallel curl speed, but
suffer from potentially long blocking times.

For example, if there are 9 requests that return in 0.001s while one takes 100 seconds,
the 0.001s requests will not be processed until 100s later.

RollingCurl is a curl multi-handle library that solves this problem by using a
contiuously updated queue.  A batch of handles are initiated, and the first to return
is processed immediately.  Before closing the current request's handle, a new request
is popped off the queue and given to the multihandle to begin processing. This allows
the library to "roll" through the pending requests without waiting for the slowest to
complete.

Using the previous example, all 9 "0.001s" requests would return, process and refill
the queue with new requests while the single "100s" request continues to transfer.

Closes #20
Closes #24
  • Loading branch information
polyfractal committed Mar 23, 2013
1 parent 9746b21 commit c92fdab
Show file tree
Hide file tree
Showing 17 changed files with 1,359 additions and 162 deletions.
277 changes: 277 additions & 0 deletions src/Sherlock/common/tmp/RollingCurl/Request.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,277 @@
<?php

/**
* A cURL library to fetch a large number of resources while only using
* a limited number of simultaneous connections
*
* @package RollingCurl
* @version 1.0
* @author Jeff Minard (http://jrm.cc/)
* @author Josh Fraser (www.joshfraser.com)
* @author Alexander Makarov (http://rmcreative.ru/)
* @license Apache License 2.0
* @link https://github.com/chuyskywalker/rolling-curl
*/

namespace Sherlock\common\tmp\RollingCurl;

/**
* Class that represent a single curl request
*/
class Request
{
/**
* @var string
*/
private $url;
/**
* @var string
*/
private $method;
/**
* @var string
*/
private $postData;
/**
* @var array
*/
private $headers;
/**
* @var array
*/
private $options;
/**
* @var mixed
*/
private $extraInfo;
/**
* @var string
*/
private $responseText;
/**
* @var array
*/
private $responseInfo;
/**
* @var string
*/
private $responseError;
/**
* @var int
*/
private $responseErrno;

/**
* @param string $url
* @param string $method
* @return \RollingCurl\Request
*/
function __construct($url, $method="GET")
{
$this->setUrl($url);
$this->setMethod($method);
}

/**
* You may wish to store some "extra" info with this request, you can put any of that here.
*
* @param mixed $extraInfo
* @return \RollingCurl\Request
*/
public function setExtraInfo($extraInfo)
{
$this->extraInfo = $extraInfo;
return $this;
}

/**
* @return mixed
*/
public function getExtraInfo()
{
return $this->extraInfo;
}

/**
* @param array $headers
* @return \RollingCurl\Request
*/
public function setHeaders($headers)
{
$this->headers = $headers;
return $this;
}

/**
* @return array
*/
public function getHeaders()
{
return $this->headers;
}

/**
* @param string $method
* @return \RollingCurl\Request
*/
public function setMethod($method)
{
$this->method = $method;
return $this;
}

/**
* @return string
*/
public function getMethod()
{
return $this->method;
}

/**
* @param array $options
* @throws \InvalidArgumentException
* @return \RollingCurl\Request
*/
public function setOptions($options)
{
if (!is_array($options)) {
throw new \InvalidArgumentException("options must be an array");
}
$this->options = $options;
return $this;
}

/**
* @param array $options
* @throws \InvalidArgumentException
* @return \RollingCurl\Request
*/
public function addOptions($options)
{
if (!is_array($options)) {
throw new \InvalidArgumentException("options must be an array");
}
$this->options = $options + $this->options;
return $this;
}

/**
* @return array
*/
public function getOptions()
{
return $this->options;
}

/**
* @param string $postData
* @return \RollingCurl\Request
*/
public function setPostData($postData)
{
$this->postData = $postData;
return $this;
}

/**
* @return string
*/
public function getPostData()
{
return $this->postData;
}

/**
* @param int $responseErrno
* @return \RollingCurl\Request
*/
public function setResponseErrno($responseErrno)
{
$this->responseErrno = $responseErrno;
return $this;
}

/**
* @return int
*/
public function getResponseErrno()
{
return $this->responseErrno;
}

/**
* @param string $responseError
* @return \RollingCurl\Request
*/
public function setResponseError($responseError)
{
$this->responseError = $responseError;
return $this;
}

/**
* @return string
*/
public function getResponseError()
{
return $this->responseError;
}

/**
* @param array $responseInfo
* @return \RollingCurl\Request
*/
public function setResponseInfo($responseInfo)
{
$this->responseInfo = $responseInfo;
return $this;
}

/**
* @return array
*/
public function getResponseInfo()
{
return $this->responseInfo;
}

/**
* @param string $responseText
* @return \RollingCurl\Request
*/
public function setResponseText($responseText)
{
$this->responseText = $responseText;
return $this;
}

/**
* @return string
*/
public function getResponseText()
{
return $this->responseText;
}

/**
* @param string $url
* @return \RollingCurl\Request
*/
public function setUrl($url)
{
$this->url = $url;
return $this;
}

/**
* @return string
*/
public function getUrl()
{
return $this->url;
}



}
Loading

0 comments on commit c92fdab

Please sign in to comment.