Adam Pippin
3 years ago
commit
adc0899acc
12 changed files with 584 additions and 0 deletions
@ -0,0 +1,5 @@ |
|||
{ |
|||
"autoload": { |
|||
"psr-4": { "DbCopy\\": "src/" } |
|||
} |
|||
} |
@ -0,0 +1,64 @@ |
|||
<?php |
|||
|
|||
require('vendor/autoload.php'); |
|||
|
|||
if (!isset($argv[1]) || !file_exists($argv[1])) |
|||
{ |
|||
die('Config file not specified or not found'.PHP_EOL); |
|||
} |
|||
|
|||
$config = json_decode(file_get_contents($argv[1]), true); |
|||
|
|||
if (!isset($config)) |
|||
{ |
|||
die('Invalid config file'.PHP_EOL); |
|||
} |
|||
|
|||
$source_driver = $config['source']['driver']; |
|||
$source_config = $config['source']['config']; |
|||
$class = '\\DbCopy\\Driver\\'.$source_driver; |
|||
$source = new $class(); |
|||
$source->open($source_config); |
|||
|
|||
$target_driver = $config['target']['driver']; |
|||
$target_config = $config['target']['config']; |
|||
$class = '\\DbCopy\\Driver\\'.$target_driver; |
|||
$target = new $class(); |
|||
$target->open($target_config); |
|||
|
|||
$transforms = []; |
|||
if (isset($config['transform'])) |
|||
{ |
|||
foreach ($config['transform'] as $transform) |
|||
{ |
|||
$transform_type = $transform['type']; |
|||
$transform_config = $transform['config']; |
|||
$class = '\\DbCopy\\Transform\\'.$transform_type; |
|||
$transforms[] = new $class($transform_config); |
|||
} |
|||
} |
|||
|
|||
$filter_column = $config['filter']['column']; |
|||
$filter_values = $config['filter']['values']; |
|||
|
|||
$copy_options = $config['options']; |
|||
|
|||
$results = []; |
|||
|
|||
foreach ($filter_values as $filter_value) |
|||
{ |
|||
echo "===== COPYING $filter_column=$filter_value =====".PHP_EOL; |
|||
|
|||
$copy = \DbCopy\Copy::create($copy_options); |
|||
// FIXME: This query thing is ES-specific |
|||
$copy->source($source, ['query'=>[$filter_column=>$filter_value]]); |
|||
$copy->destination($target); |
|||
if (!empty($transforms)) |
|||
$copy->transform(\DbCopy\Transform\Group::create($transforms)); |
|||
$count = $copy->execute(); |
|||
echo "Copied: $count records".PHP_EOL; |
|||
$results[$filter_value] = $count; |
|||
} |
|||
|
|||
var_dump($results); |
|||
|
@ -0,0 +1,117 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy; |
|||
use DbCopy\Driver\IDriver; |
|||
|
|||
class Copy |
|||
{ |
|||
private $_options; |
|||
|
|||
private $_src = null, $_srcParams = []; |
|||
private $_dst = null, $_dstParams = []; |
|||
private $_transform = null; |
|||
|
|||
private $_status_lastUpdate, $_status_lastCount; |
|||
|
|||
public function __construct(array $params = []) |
|||
{ |
|||
$this->_options = array_merge([ |
|||
'limit' => null, |
|||
'batch' => 2500, |
|||
'status_interval' => 5 |
|||
], $params); |
|||
} |
|||
|
|||
public static function create(array $params = []): Copy |
|||
{ |
|||
return new Copy($params); |
|||
} |
|||
|
|||
public function source(IDriver $source, array $params = []): Copy |
|||
{ |
|||
$this->_src = $source; |
|||
$this->_srcParams = $params; |
|||
return $this; |
|||
} |
|||
|
|||
public function destination(IDriver $destination, array $params = []): Copy |
|||
{ |
|||
$this->_dst = $destination; |
|||
$this->_dstParams = $params; |
|||
return $this; |
|||
} |
|||
|
|||
public function transform(callable $callback): Copy |
|||
{ |
|||
$this->_transform = $callback; |
|||
return $this; |
|||
} |
|||
|
|||
public function execute(): int |
|||
{ |
|||
$cursor = $this->_src->get($this->_srcParams); |
|||
$transform = $this->_transform; |
|||
$count = 0; |
|||
|
|||
for (;;) |
|||
{ |
|||
// Read records |
|||
$records = []; |
|||
$batchSize = 0; |
|||
while ($record = $cursor->get()) |
|||
{ |
|||
$records[] = $record; |
|||
$batchSize++; |
|||
if ($batchSize == $this->_options['batch'] || |
|||
$count + $batchSize == $this->_options['limit']) |
|||
break; |
|||
} |
|||
|
|||
if ($batchSize == 0) |
|||
{ |
|||
// Done copying! |
|||
return $count; |
|||
} |
|||
|
|||
// Transform records if applicable |
|||
if (is_callable($transform)) |
|||
{ |
|||
foreach ($records as &$record) |
|||
{ |
|||
$record = $transform($record); |
|||
} |
|||
unset($record); |
|||
$records = array_filter($records); |
|||
} |
|||
|
|||
// Write records |
|||
if (method_exists($this->_dst, 'putBatch') && $this->_options['batch'] > 1) |
|||
{ |
|||
$this->_dst->putBatch($records, $this->_dstParams); |
|||
} |
|||
else |
|||
{ |
|||
// Otherwise write one by one |
|||
foreach ($records as $record) |
|||
{ |
|||
$this->_dst->put($record, $this->_dstParams); |
|||
} |
|||
} |
|||
|
|||
$count += sizeof($records); |
|||
|
|||
if (isset($this->_options['status_interval']) && |
|||
$this->_status_lastUpdate + $this->_options['status_interval'] < time()) |
|||
{ |
|||
$processed = $count - $this->_status_lastCount; |
|||
$pps = round($processed / (time() - $this->_status_lastUpdate), 0); |
|||
echo "Complete: ".$count."; Records/sec: ".$pps.PHP_EOL; |
|||
$this->_status_lastUpdate = time(); |
|||
$this->_status_lastCount = $count; |
|||
} |
|||
} |
|||
|
|||
return $count; |
|||
} |
|||
|
|||
} |
@ -0,0 +1,44 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy; |
|||
|
|||
class Cursor implements ICursor |
|||
{ |
|||
private $_closed; |
|||
private $_buffer; |
|||
private $_callback; |
|||
|
|||
public function __construct(callable $callback) |
|||
{ |
|||
$this->_callback = $callback; |
|||
$this->_closed = false; |
|||
$this->_buffer = []; |
|||
} |
|||
|
|||
public function closed(): bool |
|||
{ |
|||
return $this->_closed; |
|||
} |
|||
|
|||
public function get(): ?array |
|||
{ |
|||
if ($this->_closed) |
|||
{ |
|||
return null; |
|||
} |
|||
|
|||
if (sizeof($this->_buffer) == 0) |
|||
{ |
|||
// Fill more data |
|||
$callback = $this->_callback; |
|||
$this->_buffer = $callback(); |
|||
if (sizeof($this->_buffer) == 0) |
|||
{ |
|||
$this->_closed = true; |
|||
return null; |
|||
} |
|||
} |
|||
|
|||
return array_shift($this->_buffer); |
|||
} |
|||
} |
@ -0,0 +1,114 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Driver; |
|||
|
|||
class Elasticsearch implements IDriver |
|||
{ |
|||
private $_options; |
|||
|
|||
public function open(array $params): void |
|||
{ |
|||
$params = array_merge([ |
|||
'endpoint' => null, |
|||
'index' => null, |
|||
'retries' => 10, |
|||
'size' => 5000, |
|||
'scroll_window' => '5m', |
|||
'query' => null, |
|||
'timeout' => 500000 |
|||
], $params); |
|||
|
|||
$params = filter_var_array($params, [ |
|||
'endpoint' => ['filter'=>FILTER_VALIDATE_URL, 'flags'=>FILTER_REQUIRE_SCALAR], |
|||
'index' => ['filter'=>FILTER_SANITIZE_ENCODED, 'flags'=>FILTER_REQUIRE_SCALAR], |
|||
'retries' => ['filter'=>FILTER_VALIDATE_INT, 'flags'=>FILTER_REQUIRE_SCALAR], |
|||
'size' => ['filter'=>FILTER_VALIDATE_INT, 'flags'=>FILTER_REQUIRE_SCALAR], |
|||
'scroll_window' => ['filter'=>FILTER_UNSAFE_RAW, 'flags'=>FILTER_REQUIRE_SCALAR], |
|||
'query' => ['filter'=>FILTER_UNSAFE_RAW, 'flags'=>FILTER_REQUIRE_ARRAY] |
|||
]); |
|||
|
|||
$this->_options = $params; |
|||
} |
|||
|
|||
public function close(): void |
|||
{ |
|||
// noop |
|||
} |
|||
|
|||
public function get(array $params = []): \DbCopy\ICursor |
|||
{ |
|||
$options = array_merge($this->_options, $params); |
|||
$state = (object)[ |
|||
'scroll_id' => null, |
|||
]; |
|||
|
|||
return new \DbCopy\Cursor(function() use ($options, $state) { |
|||
$retry_timeout = 500000; |
|||
for ($i=0; $i<$options['retries']; $i++) |
|||
{ |
|||
$ch = curl_init(); |
|||
|
|||
if (isset($state->scroll_id)) |
|||
{ |
|||
$body = json_encode(['scroll'=>$options['scroll_window'], 'scroll_id'=>$state->scroll_id]); |
|||
curl_setopt($ch, CURLOPT_URL, $options['endpoint'].'/_search/scroll'); |
|||
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); |
|||
curl_setopt($ch, CURLOPT_HTTPHEADER, [ |
|||
'Content-Type: application/json', |
|||
'Content-Length: '.strlen($body) |
|||
]); |
|||
curl_setopt($ch, CURLOPT_POSTFIELDS, $body); |
|||
} |
|||
else |
|||
{ |
|||
$query_string = []; |
|||
$query_string[] = 'scroll='.$options['scroll_window']; |
|||
$query_string[] = 'size='.$options['size']; |
|||
if (isset($options['query']) && $options['query'] !== false) |
|||
{ |
|||
$search_params = []; |
|||
foreach ($options['query'] as $k=>$v) |
|||
{ |
|||
$search_params[] = $k.':'.$v; |
|||
} |
|||
$query_string[] = 'q='.implode(' AND ', $search_params); |
|||
} |
|||
|
|||
curl_setopt($ch, CURLOPT_URL, $options['endpoint'].'/'.$options['index'].'/_search?'.implode('&', $query_string)); |
|||
} |
|||
|
|||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); |
|||
$response = curl_exec($ch); |
|||
curl_close($ch); |
|||
|
|||
if ($response === false) |
|||
{ |
|||
usleep($timeout); |
|||
$timeout = $timeout * 2; |
|||
continue; |
|||
} |
|||
|
|||
$response = json_decode($response, true); |
|||
|
|||
if (!isset($response['_scroll_id'])) |
|||
{ |
|||
var_dump($response); |
|||
throw new \Exception("Elasticsearch response did not contain scroll id"); |
|||
} |
|||
|
|||
$state->scroll_id = $response['_scroll_id']; |
|||
|
|||
return array_map(function($es_arr) { |
|||
return $es_arr['_source']; |
|||
}, $response['hits']['hits']); |
|||
} |
|||
throw new \Exception("Requested failed after ".$options['retries']." retries."); |
|||
}); |
|||
} |
|||
|
|||
public function put(array $record): void |
|||
{ |
|||
throw new \Exception("Not implemented"); |
|||
} |
|||
|
|||
} |
@ -0,0 +1,12 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Driver; |
|||
|
|||
interface IDriver |
|||
{ |
|||
public function open(array $params): void; |
|||
public function close(): void; |
|||
|
|||
public function get(array $params = []): \DbCopy\ICursor; |
|||
public function put(array $record): void; |
|||
} |
@ -0,0 +1,121 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Driver; |
|||
|
|||
class Postgres implements IDriver |
|||
{ |
|||
private $_options; |
|||
private $_pdo; |
|||
private $_insert_statement, $_insert_statement_size, $_insert_statement_fields; |
|||
|
|||
public function open(array $params): void |
|||
{ |
|||
$params = array_merge([ |
|||
'dsn' => null, |
|||
'username' => null, |
|||
'password' => null, |
|||
'query' => null, |
|||
'table' => null |
|||
], $params); |
|||
|
|||
$this->_options = $params; |
|||
|
|||
$this->_pdo = new \PDO($params['dsn'], $params['username'], $params['password']); |
|||
$this->_pdo->query('SET AUTOCOMMIT'); |
|||
} |
|||
|
|||
public function close(): void |
|||
{ |
|||
$this->_pdo = null; |
|||
} |
|||
|
|||
public function get(array $params = []): \DbCopy\ICursor |
|||
{ |
|||
$options = array_merge($this->_options, $params); |
|||
|
|||
$statement = $this->_pdo->prepare($params['query']); |
|||
$statement->execute(); |
|||
|
|||
return new \DbCopy\Cursor(function() use ($options, $statement) { |
|||
$record = $statement->fetch(\PDO::FETCH_ASSOC); |
|||
if (!$record) return []; |
|||
return [$record]; |
|||
}); |
|||
|
|||
|
|||
} |
|||
|
|||
public function put(array $record, array $params = []): void |
|||
{ |
|||
$params = array_merge($this->_options, $params); |
|||
|
|||
$sql = 'insert into "'.$params['table'].'"('; |
|||
$fields = array_keys($record); |
|||
$sql .= '"'.implode('", "', $fields).'"'; |
|||
$sql .= ') VALUES ('.implode(', ', str_split(str_repeat('?', sizeof($fields)))).')'; |
|||
$statement = $this->_pdo->prepare($sql); |
|||
$result = $statement->execute(array_values($record)); |
|||
if ($result === false) |
|||
{ |
|||
var_dump($record); |
|||
throw new \Exception("Error inserting record into Postgres: ".print_r($statement->errorInfo(), true)); |
|||
} |
|||
} |
|||
|
|||
public function putBatch(array $records, array $params = []): void |
|||
{ |
|||
$params = array_merge($this->_options, $params); |
|||
|
|||
// Find all possible fields in all records while sorting their keys |
|||
// so they're all consistent |
|||
$fields = []; |
|||
foreach ($records as &$record) |
|||
{ |
|||
$fields = array_merge($fields, array_keys($record)); |
|||
} |
|||
unset($record); |
|||
// Make unique |
|||
$fields = array_unique($fields); |
|||
// Sort so this is kinda stable |
|||
sort($fields); |
|||
|
|||
// Generate prepared statement if one doesn't exist or it doesn't match |
|||
// the current batch |
|||
if (!isset($this->_insert_statement) || |
|||
sizeof($records) != $this->_insert_statement_size || |
|||
$fields !== $this->_insert_statement_fields) |
|||
{ |
|||
echo 'new statement'.PHP_EOL; |
|||
$sql = 'insert into "'.$params['table'].'"('; |
|||
$sql .= '"'.implode('", "', $fields).'"'; |
|||
$sql .= ') VALUES '; |
|||
$sql .= substr(str_repeat('('.implode(', ', str_split(str_repeat('?', sizeof($fields)))).'), ', sizeof($records)), 0, -2); |
|||
$this->_insert_statement = $this->_pdo->prepare($sql); |
|||
$this->_insert_statement_size = sizeof($records); |
|||
$this->_insert_statement_fields = $fields; |
|||
} |
|||
|
|||
// Generate values from records |
|||
$values = []; |
|||
foreach ($records as $record) |
|||
{ |
|||
foreach ($fields as $field) |
|||
{ |
|||
if (isset($record[$field])) |
|||
$values[] = $record[$field]; |
|||
else |
|||
$values[] = null; |
|||
} |
|||
} |
|||
|
|||
// Execute statement |
|||
$result = $this->_insert_statement->execute($values); |
|||
|
|||
if ($result === false) |
|||
{ |
|||
throw new \Exception("Error inserting record into Postgres: ".print_r($this->_insert_statement->errorInfo(), true)); |
|||
} |
|||
|
|||
} |
|||
|
|||
} |
@ -0,0 +1,9 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy; |
|||
|
|||
interface ICursor |
|||
{ |
|||
public function closed(): bool; |
|||
public function get(): ?array; |
|||
} |
@ -0,0 +1,31 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Transform; |
|||
|
|||
class FieldMap extends Transform |
|||
{ |
|||
private $_fields; |
|||
|
|||
public function __construct(array $fields) |
|||
{ |
|||
$this->_fields = $fields; |
|||
} |
|||
|
|||
public static function create(array $fields): FieldMap |
|||
{ |
|||
return new FieldMap($fields); |
|||
} |
|||
|
|||
public function __invoke($record): ?array |
|||
{ |
|||
$new_record = []; |
|||
foreach ($this->_fields as $src=>$dst) |
|||
{ |
|||
if (isset($record[$src])) |
|||
{ |
|||
$new_record[$dst] = $record[$src]; |
|||
} |
|||
} |
|||
return $new_record; |
|||
} |
|||
} |
@ -0,0 +1,29 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Transform; |
|||
|
|||
class Group extends Transform |
|||
{ |
|||
private $_transforms; |
|||
|
|||
public function __construct(array $transforms) |
|||
{ |
|||
$this->_transforms = $transforms; |
|||
} |
|||
|
|||
public static function create(array $transforms): Group |
|||
{ |
|||
return new Group($transforms); |
|||
} |
|||
|
|||
public function __invoke($record): ?array |
|||
{ |
|||
foreach ($this->_transforms as $transform) |
|||
{ |
|||
$record = $transform($record); |
|||
if (!isset($record)) |
|||
return null; |
|||
} |
|||
return $record; |
|||
} |
|||
} |
@ -0,0 +1,8 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Transform; |
|||
|
|||
abstract class Transform |
|||
{ |
|||
public abstract function __invoke($record): ?array; |
|||
} |
@ -0,0 +1,30 @@ |
|||
<?php |
|||
|
|||
namespace DbCopy\Transform; |
|||
|
|||
class ValueMap extends Transform |
|||
{ |
|||
private $_map; |
|||
|
|||
public function __construct(array $map) |
|||
{ |
|||
$this->_map = $map; |
|||
} |
|||
|
|||
public static function create(array $map): ValueMap |
|||
{ |
|||
return new ValueMap($map); |
|||
} |
|||
|
|||
public function __invoke($record): ?array |
|||
{ |
|||
foreach ($this->_map as $field=>$values) |
|||
{ |
|||
if (isset($record[$field]) && isset($values[$record[$field]])) |
|||
{ |
|||
$record[$field] = $values[$record[$field]]; |
|||
} |
|||
} |
|||
return $record; |
|||
} |
|||
} |
Loading…
Reference in new issue