Shift data around between different data stores.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

114 lines
3.0 KiB

<?php
namespace DbCopy\Driver;
class Elasticsearch implements IDriver
{
private $_options;
public function open(array $params): void
{
$params = array_merge([
'endpoint' => null,
'index' => null,
'retries' => 10,
'size' => 5000,
'scroll_window' => '5m',
'query' => null,
'timeout' => 500000
], $params);
$params = filter_var_array($params, [
'endpoint' => ['filter'=>FILTER_VALIDATE_URL, 'flags'=>FILTER_REQUIRE_SCALAR],
'index' => ['filter'=>FILTER_SANITIZE_ENCODED, 'flags'=>FILTER_REQUIRE_SCALAR],
'retries' => ['filter'=>FILTER_VALIDATE_INT, 'flags'=>FILTER_REQUIRE_SCALAR],
'size' => ['filter'=>FILTER_VALIDATE_INT, 'flags'=>FILTER_REQUIRE_SCALAR],
'scroll_window' => ['filter'=>FILTER_UNSAFE_RAW, 'flags'=>FILTER_REQUIRE_SCALAR],
'query' => ['filter'=>FILTER_UNSAFE_RAW, 'flags'=>FILTER_REQUIRE_ARRAY]
]);
$this->_options = $params;
}
public function close(): void
{
// noop
}
public function get(array $params = []): \DbCopy\ICursor
{
$options = array_merge($this->_options, $params);
$state = (object)[
'scroll_id' => null,
];
return new \DbCopy\Cursor(function() use ($options, $state) {
$retry_timeout = 500000;
for ($i=0; $i<$options['retries']; $i++)
{
$ch = curl_init();
if (isset($state->scroll_id))
{
$body = json_encode(['scroll'=>$options['scroll_window'], 'scroll_id'=>$state->scroll_id]);
curl_setopt($ch, CURLOPT_URL, $options['endpoint'].'/_search/scroll');
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST');
curl_setopt($ch, CURLOPT_HTTPHEADER, [
'Content-Type: application/json',
'Content-Length: '.strlen($body)
]);
curl_setopt($ch, CURLOPT_POSTFIELDS, $body);
}
else
{
$query_string = [];
$query_string[] = 'scroll='.$options['scroll_window'];
$query_string[] = 'size='.$options['size'];
if (isset($options['query']) && $options['query'] !== false)
{
$search_params = [];
foreach ($options['query'] as $k=>$v)
{
$search_params[] = $k.':'.$v;
}
$query_string[] = 'q='.implode(' AND ', $search_params);
}
curl_setopt($ch, CURLOPT_URL, $options['endpoint'].'/'.$options['index'].'/_search?'.implode('&', $query_string));
}
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
$response = curl_exec($ch);
curl_close($ch);
if ($response === false)
{
usleep($timeout);
$timeout = $timeout * 2;
continue;
}
$response = json_decode($response, true);
if (!isset($response['_scroll_id']))
{
var_dump($response);
throw new \Exception("Elasticsearch response did not contain scroll id");
}
$state->scroll_id = $response['_scroll_id'];
return array_map(function($es_arr) {
return $es_arr['_source'];
}, $response['hits']['hits']);
}
throw new \Exception("Requested failed after ".$options['retries']." retries.");
});
}
public function put(array $record): void
{
throw new \Exception("Not implemented");
}
}