Changes from feedback
This commit is contained in:
parent
a35654a071
commit
4f7d180d2c
2 changed files with 303 additions and 313 deletions
|
@ -1,19 +1,41 @@
|
||||||
<?php
|
<?php
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class DataConsistencyChecker
|
||||||
|
*
|
||||||
|
* The DataConsistencyChecker class is responsible for checking the consistency of data.
|
||||||
|
*/
|
||||||
|
|
||||||
class DataConsistencyChecker
|
class DataConsistencyChecker
|
||||||
{
|
{
|
||||||
const DEFAULT_PAGE_SIZE = 30;
|
|
||||||
private $_cluster;
|
private $_cluster;
|
||||||
private $session;
|
private $session;
|
||||||
private $cassandra;
|
private $cassandra;
|
||||||
private $directory;
|
private $directory;
|
||||||
private $structured_directory;
|
private $structured_directory;
|
||||||
private $retrived_csv;
|
private $retrived_csv;
|
||||||
static $clientId;
|
private static $clientId;
|
||||||
private static $schemaVersion = 1;
|
private static $schemaVersion = 1;
|
||||||
|
private static $bucketMagic = 4;
|
||||||
|
|
||||||
static $bucketMagic = 4;
|
private const DEFAULT_PAGE_SIZE = 30;
|
||||||
static $cassandraHost = 'cassandra';
|
private const CASSANDRA_RESULT_CSV = 'result_from_cassandra_entries.csv';
|
||||||
|
private const CASSANDRA_HTML_REPORT = 'cassandra.html';
|
||||||
|
private const PHYSICAL_RESULT_CSV = 'result_from_physical_files.csv';
|
||||||
|
private const PHYSICAL_HTML_REPORT = 'physical.html';
|
||||||
|
private const CSV_COLUMN_FILE_ATTACHMENT = 'File/Attachment';
|
||||||
|
private const CSV_COLUMN_FILE_PATH = 'File path';
|
||||||
|
private const CSV_COLUMN_FILE_NAME = 'File name';
|
||||||
|
private const CSV_COLUMN_THUMB1 = 'Thumb 1';
|
||||||
|
private const CSV_COLUMN_THUMB2 = 'Thumb 2';
|
||||||
|
private const CSV_COLUMN_SIZE = 'Size';
|
||||||
|
private const CSV_COLUMN_CREATION_TIME = 'Creation Time';
|
||||||
|
private const CSV_COLUMN_CLIENT_ID = 'ClientId';
|
||||||
|
private const CSV_COLUMN_BUCKET = 'Bucket';
|
||||||
|
private const CSV_COLUMN_ID = 'Id';
|
||||||
|
private const CSV_EXTENSION = '.csv';
|
||||||
|
private const NUM_FIELDS = 3;
|
||||||
|
private const DELIMITER = '-';
|
||||||
|
|
||||||
public function __construct($directory = null)
|
public function __construct($directory = null)
|
||||||
{
|
{
|
||||||
|
@ -29,26 +51,26 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function init(): void
|
public function init(): void
|
||||||
{
|
{
|
||||||
$config = parse_ini_file("config.ini", true);
|
$config = parse_ini_file('config.ini', true);
|
||||||
|
$cassandraConfig = $config['CASSANDRA'];
|
||||||
$this->_cluster = Cassandra::cluster()
|
$this->_cluster = Cassandra::cluster()
|
||||||
->withContactPoints($config['CASSANDRA']['host'])
|
->withContactPoints($cassandraConfig['host'])
|
||||||
->withPort(9042)
|
->withPort($cassandraConfig['port'])
|
||||||
->withCredentials(
|
->withCredentials(
|
||||||
$config['CASSANDRA']['user'],
|
$cassandraConfig['user'],
|
||||||
$config['CASSANDRA']['password']
|
$cassandraConfig['password']
|
||||||
)
|
)
|
||||||
->build();
|
->build();
|
||||||
if ($this->_cluster) {
|
if ($this->_cluster) {
|
||||||
try {
|
try {
|
||||||
$this->cassandra = $this->_cluster->connect($config['CASSANDRA']['keyspace']);
|
$this->cassandra = $this->_cluster->connect($cassandraConfig['keyspace']);
|
||||||
} catch (Exception $e) {
|
} catch (Exception $e) {
|
||||||
echo "err\n";
|
echo "An error occurred: " . $e->getMessage() . "\n";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -57,45 +79,54 @@ class DataConsistencyChecker
|
||||||
* @param array $arguments The command line arguments.
|
* @param array $arguments The command line arguments.
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function runFromCommandLine($arguments)
|
|
||||||
|
public function runFromCommandLine(array $arguments): void
|
||||||
{
|
{
|
||||||
$shortOptions = "hd:v:o:r:s:";
|
$shortOptions = "hd:v:o:r:s:";
|
||||||
$longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"];
|
$longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"];
|
||||||
|
|
||||||
$options = getopt($shortOptions, $longOptions);
|
$options = getopt($shortOptions, $longOptions);
|
||||||
|
|
||||||
if (count($options) == 0 || isset($options['h']) || isset($options['help'])) {
|
if (count($options) === 0 || isset($options['h']) || isset($options['help'])) {
|
||||||
$this->displayHelpMessage();
|
$this->displayHelpMessage();
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
$directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null);
|
$directory = $options['directory'] ?? $options['d'] ?? null;
|
||||||
$schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null);
|
$schemaVersion = $options['version'] ?? $options['v'] ?? null;
|
||||||
|
$source = $options['source'] ?? $options['s'] ?? null;
|
||||||
|
$remove = $options['remove'] ?? $options['r'] ?? null;
|
||||||
|
$structured_directory = $options['output'] ?? $options['o'] ?? null;
|
||||||
|
|
||||||
$source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null);
|
|
||||||
$remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null);
|
|
||||||
|
|
||||||
$structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null);
|
|
||||||
if (!file_exists($structured_directory)) {
|
if (!file_exists($structured_directory)) {
|
||||||
|
if ($structured_directory === null) {
|
||||||
|
echo "Output directory is required. Please specify the --output option.\n";
|
||||||
|
exit;
|
||||||
|
}
|
||||||
mkdir($structured_directory, 0777, true);
|
mkdir($structured_directory, 0777, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
$this->structured_directory = $structured_directory;
|
$this->structured_directory = $structured_directory;
|
||||||
|
|
||||||
if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) {
|
if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) {
|
||||||
echo "Missing Attachment directory or schema version.\n";
|
echo "Missing Attachment directory or schema version.\n";
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($schemaVersion && !in_array($schemaVersion, [1, 2])) {
|
if ($schemaVersion && !in_array($schemaVersion, [1, 2])) {
|
||||||
echo "Invalid schema version. Only versions 1 and 2 are supported.\n";
|
echo "Invalid schema version. Only versions 1 and 2 are supported.\n";
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
if ($this->structured_directory == null) {
|
|
||||||
$this->structured_directory = './';
|
if ($structured_directory === null) {
|
||||||
|
echo "Output directory is required. Please specify the --output option.\n";
|
||||||
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
static::$schemaVersion = (int) $schemaVersion;
|
static::$schemaVersion = (int) $schemaVersion;
|
||||||
$this->directory = $directory;
|
$this->directory = $directory;
|
||||||
$this->retrived_csv = './result_from_physical_files.csv';
|
$this->retrived_csv = './result_from_physical_files.csv';
|
||||||
|
|
||||||
if ($remove && $source) {
|
if ($remove && $source) {
|
||||||
$this->processAttachmentDeletionCSV($remove, $source);
|
$this->processAttachmentDeletionCSV($remove, $source);
|
||||||
} else {
|
} else {
|
||||||
|
@ -107,11 +138,13 @@ class DataConsistencyChecker
|
||||||
exit;
|
exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Displays the help message with instructions on how to use the script.
|
* Displays the help message with instructions on how to use the script.
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function displayHelpMessage(): void
|
private function displayHelpMessage(): void
|
||||||
{
|
{
|
||||||
$helpMessage = <<<EOT
|
$helpMessage = <<<EOT
|
||||||
|
@ -140,13 +173,17 @@ class DataConsistencyChecker
|
||||||
* @param string $directory The directory path to be removed.
|
* @param string $directory The directory path to be removed.
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
private function removeDirectory($directory): void
|
|
||||||
|
private function removeDirectory(string $directory): void
|
||||||
{
|
{
|
||||||
if (!is_dir($directory)) {
|
if (!is_dir($directory)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
$files = array_diff(scandir($directory), ['.', '..']);
|
$files = array_diff(
|
||||||
|
scandir($directory),
|
||||||
|
['.', '..']
|
||||||
|
);
|
||||||
|
|
||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
$path = $directory . '/' . $file;
|
$path = $directory . '/' . $file;
|
||||||
|
@ -157,7 +194,6 @@ class DataConsistencyChecker
|
||||||
unlink($path);
|
unlink($path);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
rmdir($directory);
|
rmdir($directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -167,7 +203,8 @@ class DataConsistencyChecker
|
||||||
* @param string $tableName The name of the table in the database to check consistency for.
|
* @param string $tableName The name of the table in the database to check consistency for.
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
public function checkConsistency($tableName)
|
|
||||||
|
public function checkConsistency(string $tableName): void
|
||||||
{
|
{
|
||||||
$this->init();
|
$this->init();
|
||||||
$dbEntries = $this->getDbEntries($tableName);
|
$dbEntries = $this->getDbEntries($tableName);
|
||||||
|
@ -182,7 +219,7 @@ class DataConsistencyChecker
|
||||||
* @return array An array containing the file entries organized by dynamic values.
|
* @return array An array containing the file entries organized by dynamic values.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function getFileEntries($directory): array
|
private function getFileEntries(string $directory): array
|
||||||
{
|
{
|
||||||
$files = glob($directory . '/*');
|
$files = glob($directory . '/*');
|
||||||
$entries = [];
|
$entries = [];
|
||||||
|
@ -190,19 +227,20 @@ class DataConsistencyChecker
|
||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
if (is_file($file)) {
|
if (is_file($file)) {
|
||||||
$fileName = basename($file);
|
$fileName = basename($file);
|
||||||
|
|
||||||
if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) {
|
if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$dashParts = explode("-", $fileName, 2);
|
$dashParts = explode(self::DELIMITER, $fileName, 2);
|
||||||
$dotParts = explode(".", $fileName);
|
$dotParts = explode('.', $fileName);
|
||||||
if (count($dashParts) === 2) {
|
if (count($dashParts) === 2) {
|
||||||
$clientId = $dashParts[0];
|
$clientId = $dashParts[0];
|
||||||
$id = $dashParts[1];
|
$id = $dashParts[1];
|
||||||
$fileParts = $dashParts;
|
$fileParts = $dashParts;
|
||||||
$delimiter = '-';
|
$delimiter = self::DELIMITER;
|
||||||
} elseif (count($dotParts) >= 2) {
|
} elseif (count($dotParts) >= 2) {
|
||||||
$clientId = $dotParts[0];
|
$clientId = $dotParts[0];
|
||||||
$id = implode(".", array_slice($dotParts, 1));
|
$id = implode('.', array_slice($dotParts, 1));
|
||||||
$fileParts = $dotParts;
|
$fileParts = $dotParts;
|
||||||
$delimiter = '.';
|
$delimiter = '.';
|
||||||
} else {
|
} else {
|
||||||
|
@ -228,7 +266,6 @@ class DataConsistencyChecker
|
||||||
foreach ($entries as $clientId => $clientEntries) {
|
foreach ($entries as $clientId => $clientEntries) {
|
||||||
$this->createPhysicalFileCSV($clientId, $clientEntries);
|
$this->createPhysicalFileCSV($clientId, $clientEntries);
|
||||||
}
|
}
|
||||||
|
|
||||||
return $entries;
|
return $entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -241,9 +278,9 @@ class DataConsistencyChecker
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function createPhysicalFileCSV($clientId, $entries)
|
private function createPhysicalFileCSV(string $clientId, array $entries): void
|
||||||
{
|
{
|
||||||
$fileName = $this->structured_directory . "physical_" . $clientId . ".csv";
|
$fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION;
|
||||||
$csvFile = fopen($fileName, 'w');
|
$csvFile = fopen($fileName, 'w');
|
||||||
fputcsv($csvFile, ['id', 'size', 'creation_time']);
|
fputcsv($csvFile, ['id', 'size', 'creation_time']);
|
||||||
foreach ($entries as $entry) {
|
foreach ($entries as $entry) {
|
||||||
|
@ -265,9 +302,9 @@ class DataConsistencyChecker
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function createDBFileCSV($clientId, $entries): void
|
private function createDBFileCSV(string $clientId, array $entries): void
|
||||||
{
|
{
|
||||||
$fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv";
|
$fileName = $this->structured_directory . 'cassandra_' . (string) $clientId . self::CSV_EXTENSION;
|
||||||
$csvFile = fopen($fileName, 'w');
|
$csvFile = fopen($fileName, 'w');
|
||||||
|
|
||||||
$headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id'];
|
$headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id'];
|
||||||
|
@ -283,10 +320,8 @@ class DataConsistencyChecker
|
||||||
$entry['client_id'],
|
$entry['client_id'],
|
||||||
$entry['attachment_id'],
|
$entry['attachment_id'],
|
||||||
];
|
];
|
||||||
|
|
||||||
fputcsv($csvFile, $rowData);
|
fputcsv($csvFile, $rowData);
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose($csvFile);
|
fclose($csvFile);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -297,15 +332,13 @@ class DataConsistencyChecker
|
||||||
* @return array An array containing the retrieved entries.
|
* @return array An array containing the retrieved entries.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
private function getDbEntries(string $tableName): array
|
||||||
private function getDbEntries($tableName)
|
|
||||||
{
|
{
|
||||||
|
$query = sprintf(
|
||||||
if ($this->schema_version() === 1) {
|
'SELECT client_id, id, size, filename, created_on%s FROM %s',
|
||||||
$query = "SELECT client_id, id, size, filename, created_on FROM $tableName";
|
$this->schema_version() === 1 ? '' : ', bucket',
|
||||||
} else {
|
$tableName
|
||||||
$query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName";
|
);
|
||||||
}
|
|
||||||
$arguments = [];
|
$arguments = [];
|
||||||
$result = $this->cassandra->execute(
|
$result = $this->cassandra->execute(
|
||||||
$query,
|
$query,
|
||||||
|
@ -317,7 +350,6 @@ class DataConsistencyChecker
|
||||||
$entries = [];
|
$entries = [];
|
||||||
while ($result) {
|
while ($result) {
|
||||||
foreach ($result as $row) {
|
foreach ($result as $row) {
|
||||||
|
|
||||||
if (preg_match('/^[0-9]+$/', $row['id'])) {
|
if (preg_match('/^[0-9]+$/', $row['id'])) {
|
||||||
$dotParts = explode(".", $row['filename'], 2);
|
$dotParts = explode(".", $row['filename'], 2);
|
||||||
$dynamicValue = substr($dotParts[1], 0, 2);
|
$dynamicValue = substr($dotParts[1], 0, 2);
|
||||||
|
@ -336,7 +368,7 @@ class DataConsistencyChecker
|
||||||
$entries[$dynamicValue][$row['filename']] = $entry;
|
$entries[$dynamicValue][$row['filename']] = $entry;
|
||||||
} else {
|
} else {
|
||||||
$clientId = substr($row['id'], 0, 2);
|
$clientId = substr($row['id'], 0, 2);
|
||||||
$file = $row['client_id'] . '-' . $row['id'];
|
$file = $row['client_id'] . self::DELIMITER . $row['id'];
|
||||||
$timestamp = (int) $row['created_on'];
|
$timestamp = (int) $row['created_on'];
|
||||||
$date = date('Y-m-d H:i:s', $timestamp);
|
$date = date('Y-m-d H:i:s', $timestamp);
|
||||||
$creationTime = str_replace('"', '', $date);
|
$creationTime = str_replace('"', '', $date);
|
||||||
|
@ -357,7 +389,6 @@ class DataConsistencyChecker
|
||||||
}
|
}
|
||||||
$result = $result->nextPage();
|
$result = $result->nextPage();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Merge all entries into a single array
|
// Merge all entries into a single array
|
||||||
$allEntries = [];
|
$allEntries = [];
|
||||||
foreach ($entries as $clientEntries) {
|
foreach ($entries as $clientEntries) {
|
||||||
|
@ -381,7 +412,7 @@ class DataConsistencyChecker
|
||||||
* @return array|null An array containing mismatched entries, or null if the entries match.
|
* @return array|null An array containing mismatched entries, or null if the entries match.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function compareFileEntries($id, $data, $cassandra_file_assoc)
|
private function compareFileEntries(string $id, array $data, array $cassandra_file_assoc)
|
||||||
{
|
{
|
||||||
if (!isset($cassandra_file_assoc[$id])) {
|
if (!isset($cassandra_file_assoc[$id])) {
|
||||||
return [
|
return [
|
||||||
|
@ -412,7 +443,7 @@ class DataConsistencyChecker
|
||||||
* @return array An array containing the lines of the file.
|
* @return array An array containing the lines of the file.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function getFileLines($file): array
|
private function getFileLines(string $file): array
|
||||||
{
|
{
|
||||||
$file_contents = file_get_contents($file);
|
$file_contents = file_get_contents($file);
|
||||||
return explode("\n", $file_contents);
|
return explode("\n", $file_contents);
|
||||||
|
@ -425,7 +456,7 @@ class DataConsistencyChecker
|
||||||
* @return array An array of filtered and mapped entries.
|
* @return array An array of filtered and mapped entries.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function filterAndMapEntries($entries): array
|
private function filterAndMapEntries(array $entries): array
|
||||||
{
|
{
|
||||||
$filtered_entries = array_filter($entries);
|
$filtered_entries = array_filter($entries);
|
||||||
$mapped_entries = array_map(function ($entry) {
|
$mapped_entries = array_map(function ($entry) {
|
||||||
|
@ -440,18 +471,27 @@ class DataConsistencyChecker
|
||||||
* @param string $dir The directory path containing the files to be processed.
|
* @param string $dir The directory path containing the files to be processed.
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
private function process_files_in_directory($dir): void
|
|
||||||
|
private function process_files_in_directory(string $dir): void
|
||||||
{
|
{
|
||||||
$files = glob($dir . '/*.csv');
|
$files = glob($dir . '/*.csv');
|
||||||
$physical_files = array();
|
$physical_files = array();
|
||||||
$cassandra_files = array();
|
$cassandra_files = array();
|
||||||
foreach ($files as $file) {
|
foreach ($files as $file) {
|
||||||
$filename = basename($file, '.csv');
|
$filename = basename($file, static::CSV_EXTENSION);
|
||||||
$csv_type = substr($filename, 0, strpos($filename, '_'));
|
$csv_type = substr(
|
||||||
$file_num = substr($filename, strpos($filename, '_') + 1);
|
$filename,
|
||||||
if ($csv_type == 'physical') {
|
0,
|
||||||
|
strpos($filename, '_')
|
||||||
|
);
|
||||||
|
$file_num = substr(
|
||||||
|
$filename,
|
||||||
|
strpos($filename, '_') + 1
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($csv_type === 'physical') {
|
||||||
$physical_files[$file_num] = $file;
|
$physical_files[$file_num] = $file;
|
||||||
} elseif ($csv_type == 'cassandra') {
|
} elseif ($csv_type === 'cassandra') {
|
||||||
$cassandra_files[$file_num] = $file;
|
$cassandra_files[$file_num] = $file;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -465,7 +505,7 @@ class DataConsistencyChecker
|
||||||
$physical_entries = [];
|
$physical_entries = [];
|
||||||
|
|
||||||
foreach ($physical_files as $file_num => $physical_file) {
|
foreach ($physical_files as $file_num => $physical_file) {
|
||||||
$cassandra_file = $dir . '/cassandra_' . $file_num . '.csv';
|
$cassandra_file = $dir . '/cassandra_' . $file_num . self::CSV_EXTENSION;
|
||||||
|
|
||||||
if (file_exists($physical_file) && file_exists($cassandra_file)) {
|
if (file_exists($physical_file) && file_exists($cassandra_file)) {
|
||||||
$compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file);
|
$compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file);
|
||||||
|
@ -476,9 +516,6 @@ class DataConsistencyChecker
|
||||||
} else {
|
} else {
|
||||||
if (!file_exists($physical_file)) {
|
if (!file_exists($physical_file)) {
|
||||||
$missing_physical_files[] = $physical_file;
|
$missing_physical_files[] = $physical_file;
|
||||||
|
|
||||||
echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
if (!file_exists($cassandra_file)) {
|
if (!file_exists($cassandra_file)) {
|
||||||
$missing_cassandra_files[] = $cassandra_file;
|
$missing_cassandra_files[] = $cassandra_file;
|
||||||
|
@ -493,8 +530,8 @@ class DataConsistencyChecker
|
||||||
if ($line === reset($physical_file_lines)) {
|
if ($line === reset($physical_file_lines)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$values = explode(",", $line);
|
$values = explode(',', $line);
|
||||||
if (count($values) == 3) {
|
if (count($values) === self::NUM_FIELDS) {
|
||||||
$values[2] = str_replace('"', '', $values[2]);
|
$values[2] = str_replace('"', '', $values[2]);
|
||||||
$file_assoc[$values[0]] = [$values[1], $values[2]];
|
$file_assoc[$values[0]] = [$values[1], $values[2]];
|
||||||
}
|
}
|
||||||
|
@ -513,7 +550,7 @@ class DataConsistencyChecker
|
||||||
}
|
}
|
||||||
|
|
||||||
foreach ($cassandra_files as $file_num => $cassandra_file) {
|
foreach ($cassandra_files as $file_num => $cassandra_file) {
|
||||||
$physical_file = $dir . '/physical_' . $file_num . '.csv';
|
$physical_file = $dir . '/physical_' . $file_num . self::CSV_EXTENSION;
|
||||||
|
|
||||||
if (!file_exists($physical_file)) {
|
if (!file_exists($physical_file)) {
|
||||||
$cassandra_file_lines = $this->getFileLines($cassandra_file);
|
$cassandra_file_lines = $this->getFileLines($cassandra_file);
|
||||||
|
@ -532,12 +569,14 @@ class DataConsistencyChecker
|
||||||
|
|
||||||
$result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR);
|
$result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR);
|
||||||
$result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR);
|
$result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR);
|
||||||
$this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv');
|
|
||||||
$this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html');
|
$this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV);
|
||||||
$this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv');
|
$this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT);
|
||||||
$this->generateHtmlReport($result_from_physical_files, 'physical.html');
|
$this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV);
|
||||||
|
$this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Builds an associative array from file lines.
|
* Builds an associative array from file lines.
|
||||||
*
|
*
|
||||||
|
@ -545,26 +584,23 @@ class DataConsistencyChecker
|
||||||
* @return array An associative array representing the file association.
|
* @return array An associative array representing the file association.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function buildFileAssociation($file_lines): array
|
private function buildFileAssociation(array $file_lines): array
|
||||||
{
|
{
|
||||||
$file_assoc = [];
|
$file_assoc = [];
|
||||||
|
|
||||||
foreach ($file_lines as $line) {
|
foreach ($file_lines as $line) {
|
||||||
if ($line === reset($file_lines)) {
|
if ($line === reset($file_lines)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
$values = explode(",", $line);
|
$values = explode(',', $line);
|
||||||
if (count($values) == 3) {
|
if (count($values) === self::NUM_FIELDS) {
|
||||||
|
|
||||||
$file_assoc[$values[0]] = [$values[1], $values[2]];
|
$file_assoc[$values[0]] = [$values[1], $values[2]];
|
||||||
}
|
}
|
||||||
if (count($values) > 3) {
|
|
||||||
|
|
||||||
|
if (count($values) > 3) {
|
||||||
$file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]];
|
$file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return $file_assoc;
|
return $file_assoc;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -576,7 +612,7 @@ class DataConsistencyChecker
|
||||||
* @return array An array containing missing entries or entries with mismatched data.
|
* @return array An array containing missing entries or entries with mismatched data.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function compare_csv_files($file1_path, $file2_path): array
|
private function compare_csv_files(string $file1_path, string $file2_path): array
|
||||||
{
|
{
|
||||||
$file1_data = array_map('str_getcsv', file($file1_path));
|
$file1_data = array_map('str_getcsv', file($file1_path));
|
||||||
$file2_data = array_map('str_getcsv', file($file2_path));
|
$file2_data = array_map('str_getcsv', file($file2_path));
|
||||||
|
@ -597,8 +633,16 @@ class DataConsistencyChecker
|
||||||
$clientid_index_1 = array_search('client_id', $file1_headers);
|
$clientid_index_1 = array_search('client_id', $file1_headers);
|
||||||
$bucket_index_1 = array_search('bucket', $file1_headers);
|
$bucket_index_1 = array_search('bucket', $file1_headers);
|
||||||
$attachment_id_index_1 = array_search('attachment_id', $file1_headers);
|
$attachment_id_index_1 = array_search('attachment_id', $file1_headers);
|
||||||
|
$file1_assoc = array_reduce(
|
||||||
$file1_assoc = array_reduce($file1_data, function ($result, $row) use ($id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1) {
|
$file1_data, function ($result, $row) use (
|
||||||
|
$id_index_1,
|
||||||
|
$size_index_1,
|
||||||
|
$time_index_1,
|
||||||
|
$filename_index_1,
|
||||||
|
$clientid_index_1,
|
||||||
|
$bucket_index_1,
|
||||||
|
$attachment_id_index_1
|
||||||
|
) {
|
||||||
$result[$row[$id_index_1]] = [
|
$result[$row[$id_index_1]] = [
|
||||||
'id' => $row[$id_index_1],
|
'id' => $row[$id_index_1],
|
||||||
'file1' => [
|
'file1' => [
|
||||||
|
@ -613,7 +657,13 @@ class DataConsistencyChecker
|
||||||
];
|
];
|
||||||
return $result;
|
return $result;
|
||||||
}, []);
|
}, []);
|
||||||
$file2_assoc = array_reduce($file2_data, function ($result, $row) use ($id_index_2, $size_index_2, $time_index_2, $filename_index_2) {
|
$file2_assoc = array_reduce(
|
||||||
|
$file2_data, function ($result, $row) use (
|
||||||
|
$id_index_2,
|
||||||
|
$size_index_2,
|
||||||
|
$time_index_2,
|
||||||
|
$filename_index_2
|
||||||
|
) {
|
||||||
$result[$row[$id_index_2]] = [
|
$result[$row[$id_index_2]] = [
|
||||||
'id' => $row[$id_index_2],
|
'id' => $row[$id_index_2],
|
||||||
'file2' => [
|
'file2' => [
|
||||||
|
@ -648,19 +698,28 @@ class DataConsistencyChecker
|
||||||
return $missing_entries;
|
return $missing_entries;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Generates a CSV report of inconsistent files.
|
* Generates a CSV report of inconsistent files.
|
||||||
*
|
*
|
||||||
* @param array $inconsistentFiles An array of inconsistent files.
|
* @param array $inconsistentFiles An array of inconsistent files.
|
||||||
* @param string $filename The filename to use for the report.
|
* @param string $filename The filename to use for the report.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void
|
private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void
|
||||||
{
|
{
|
||||||
$fp = fopen($name, 'w');
|
$fp = fopen($name, 'w');
|
||||||
fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket', 'Id']);
|
fputcsv($fp, [
|
||||||
|
self::CSV_COLUMN_FILE_ATTACHMENT,
|
||||||
|
self::CSV_COLUMN_FILE_PATH,
|
||||||
|
self::CSV_COLUMN_FILE_NAME,
|
||||||
|
self::CSV_COLUMN_THUMB1,
|
||||||
|
self::CSV_COLUMN_THUMB2,
|
||||||
|
self::CSV_COLUMN_SIZE,
|
||||||
|
self::CSV_COLUMN_CREATION_TIME,
|
||||||
|
self::CSV_COLUMN_CLIENT_ID,
|
||||||
|
self::CSV_COLUMN_BUCKET,
|
||||||
|
self::CSV_COLUMN_ID
|
||||||
|
]);
|
||||||
|
|
||||||
foreach ($inconsistentFiles as $row) {
|
foreach ($inconsistentFiles as $row) {
|
||||||
$check_value = $row['id'];
|
$check_value = $row['id'];
|
||||||
|
@ -691,7 +750,6 @@ class DataConsistencyChecker
|
||||||
$attachmentId,
|
$attachmentId,
|
||||||
]);
|
]);
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose($fp);
|
fclose($fp);
|
||||||
chmod($name, 0666);
|
chmod($name, 0666);
|
||||||
}
|
}
|
||||||
|
@ -703,11 +761,19 @@ class DataConsistencyChecker
|
||||||
* @param string $name The name of the CSV report file to be generated.
|
* @param string $name The name of the CSV report file to be generated.
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void
|
private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void
|
||||||
{
|
{
|
||||||
|
|
||||||
$fp = fopen($name, 'w');
|
$fp = fopen($name, 'w');
|
||||||
fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']);
|
fputcsv($fp, [
|
||||||
|
self::CSV_COLUMN_FILE_ATTACHMENT,
|
||||||
|
'Entry Path',
|
||||||
|
'Entry Name',
|
||||||
|
self::CSV_COLUMN_THUMB1,
|
||||||
|
self::CSV_COLUMN_THUMB2,
|
||||||
|
self::CSV_COLUMN_SIZE,
|
||||||
|
self::CSV_COLUMN_CREATION_TIME
|
||||||
|
]);
|
||||||
|
|
||||||
foreach ($inconsistentFiles as $row) {
|
foreach ($inconsistentFiles as $row) {
|
||||||
$filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0];
|
$filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0];
|
||||||
|
@ -742,170 +808,52 @@ class DataConsistencyChecker
|
||||||
* @param array $inconsistentFiles An array of inconsistent files.
|
* @param array $inconsistentFiles An array of inconsistent files.
|
||||||
* @param string $filename The filename to use for the report.
|
* @param string $filename The filename to use for the report.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function generateHtmlReport(array $inconsistentFiles, string $name): void
|
private function generateHtmlReport(array $inconsistentFiles, string $name): void
|
||||||
{
|
{
|
||||||
|
$templateFile = 'report_template.html';
|
||||||
|
$templateContent = file_get_contents($templateFile);
|
||||||
|
|
||||||
|
if ($templateContent === false) {
|
||||||
|
throw new Exception('Failed to read the HTML template file.');
|
||||||
|
}
|
||||||
|
|
||||||
|
$tableRows = '';
|
||||||
|
foreach ($inconsistentFiles as $row) {
|
||||||
|
$filename = $row['file1'][0];
|
||||||
|
$filePath = $this->directory . '/' . $filename;
|
||||||
|
$size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath);
|
||||||
|
$creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath));
|
||||||
|
$thumb1 = $row['file1'][0] . '-thumb1';
|
||||||
|
$thumb2 = $row['file1'][0] . '-thumb2';
|
||||||
|
|
||||||
|
if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) {
|
||||||
|
$filename = $row['file1'][3];
|
||||||
|
$filePath = $this->directory . '/' . $filename;
|
||||||
|
$thumb1 = '';
|
||||||
|
$thumb2 = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_string($row['id']) && strpos($row['id'], '.') !== false) {
|
||||||
|
$filename = $row['id'];
|
||||||
|
$filePath = $this->directory . '/' . $filename;
|
||||||
|
$thumb1 = '';
|
||||||
|
$thumb2 = '';
|
||||||
|
}
|
||||||
|
|
||||||
|
$tableRows .= '<tr><td>Attachment</td><td>' . htmlspecialchars($filePath) . '</td><td>' . htmlspecialchars($filename) . '</td><td>' . htmlspecialchars($thumb1) . '</td><td>' . htmlspecialchars($thumb2) . '</td><td>' . htmlspecialchars($size) . '</td><td>' . htmlspecialchars($creationTime) . '</td></tr>';
|
||||||
|
}
|
||||||
|
$html = str_replace('{{table_rows}}', $tableRows, $templateContent);
|
||||||
$file = fopen($name, 'w');
|
$file = fopen($name, 'w');
|
||||||
|
|
||||||
if (!$file) {
|
if (!$file) {
|
||||||
throw new Exception('Failed to open the file for writing.');
|
throw new Exception('Failed to open the file for writing.');
|
||||||
}
|
}
|
||||||
$html = '<html><head><style>
|
|
||||||
table {
|
|
||||||
font-family: arial, sans-serif;
|
|
||||||
border-collapse: collapse;
|
|
||||||
width: 100%;
|
|
||||||
}
|
|
||||||
td, th {
|
|
||||||
border: 1px solid #dddddd;
|
|
||||||
text-align: left;
|
|
||||||
padding: 8px;
|
|
||||||
}
|
|
||||||
tr:nth-child(even) {
|
|
||||||
background-color: #dddddd;
|
|
||||||
}
|
|
||||||
</style></head><body><table><thead><tr><th>File/Attachment</th><th>File path</th><th>File name</th><th>Thumb 1</th><th>Thumb 2</th><th>Size</th><th>Creation Time</th></tr></thead><tbody>';
|
|
||||||
|
|
||||||
foreach ($inconsistentFiles as $row) {
|
|
||||||
|
|
||||||
$filePath = $this->directory . '/' . $row['file1'][0];
|
|
||||||
$size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath);
|
|
||||||
$creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath));
|
|
||||||
$thumb1 = isset($row['thumb1']) ? $row['thumb1'] : '';
|
|
||||||
$thumb2 = isset($row['thumb2']) ? $row['thumb2'] : '';
|
|
||||||
|
|
||||||
// Write the properties to the HTML table
|
|
||||||
$html .= '<tr><td>Attachment</td><td>' . htmlspecialchars($filePath) . '</td><td>' . htmlspecialchars($row['file1'][0]) . '</td><td>' . htmlspecialchars($thumb1) . '</td><td>' . htmlspecialchars($thumb2) . '</td><td>' . htmlspecialchars($size) . '</td><td>' . htmlspecialchars($creationTime) . '</td></tr>';
|
|
||||||
}
|
|
||||||
|
|
||||||
$html .= '</tbody></table></body></html>';
|
|
||||||
|
|
||||||
fwrite($file, $html);
|
fwrite($file, $html);
|
||||||
fclose($file);
|
fclose($file);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Compares two CSV files and creates a new CSV file containing the matching entries.
|
|
||||||
*
|
|
||||||
* @param string $firstFile The path to the first CSV file.
|
|
||||||
* @param string $secondFile The path to the second CSV file.
|
|
||||||
* @param string $finalFile The path to the final CSV file to be created.
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
|
|
||||||
function compareCSVFilesTransform($firstFile, $secondFile, $finalFile): void
|
|
||||||
{
|
|
||||||
// Read the first CSV file
|
|
||||||
$firstData = array_map('str_getcsv', file($firstFile));
|
|
||||||
$firstHeaders = array_shift($firstData);
|
|
||||||
|
|
||||||
// Read the second CSV file
|
|
||||||
$secondData = array_map('str_getcsv', file($secondFile));
|
|
||||||
$secondHeaders = array_shift($secondData);
|
|
||||||
|
|
||||||
// Find the indexes of the columns to compare in both files
|
|
||||||
$firstIdIndex = array_search('id', $firstHeaders);
|
|
||||||
$firstSizeIndex = array_search('size', $firstHeaders);
|
|
||||||
$firstCreationTimeIndex = array_search('creation_time', $firstHeaders);
|
|
||||||
|
|
||||||
$secondIdIndex = array_search('Entry Path', $secondHeaders);
|
|
||||||
$secondSizeIndex = array_search('size', $secondHeaders);
|
|
||||||
$secondCreationTimeIndex = array_search('creation_time', $secondHeaders);
|
|
||||||
|
|
||||||
// Get the entries from the first file
|
|
||||||
$firstEntries = [];
|
|
||||||
foreach ($firstData as $row) {
|
|
||||||
$firstEntries[$row[$firstIdIndex]] = [
|
|
||||||
'id' => $row[$firstIdIndex],
|
|
||||||
'size' => $row[$firstSizeIndex],
|
|
||||||
'creation_time' => $row[$firstCreationTimeIndex]
|
|
||||||
];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create the final result CSV file
|
|
||||||
$finalCsvFile = fopen($finalFile, 'w');
|
|
||||||
fputcsv($finalCsvFile, ['id', 'size', 'creation_time']);
|
|
||||||
|
|
||||||
// Compare the entries from the second file with the entries in the first file
|
|
||||||
foreach ($secondData as $row) {
|
|
||||||
$secondId = $row[$secondIdIndex];
|
|
||||||
$secondSize = $row[$secondSizeIndex];
|
|
||||||
$secondCreationTime = $row[$secondCreationTimeIndex];
|
|
||||||
|
|
||||||
// Check if there is a matching entry in the first file
|
|
||||||
if (
|
|
||||||
isset($firstEntries[$secondId]) &&
|
|
||||||
$firstEntries[$secondId]['size'] === $secondSize &&
|
|
||||||
$firstEntries[$secondId]['creation_time'] === $secondCreationTime
|
|
||||||
) {
|
|
||||||
fputcsv($finalCsvFile, [
|
|
||||||
$secondId,
|
|
||||||
$secondSize,
|
|
||||||
$secondCreationTime
|
|
||||||
]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose($finalCsvFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deletes physical files based on the entries listed in a CSV file.
|
|
||||||
*
|
|
||||||
* @param string $csvFile The path to the CSV file containing the list of files to delete.
|
|
||||||
* @return void
|
|
||||||
*/
|
|
||||||
|
|
||||||
private function deletePhysicalFilesFromCsv(string $csvFile): void
|
|
||||||
{
|
|
||||||
$directory = $this->directory . '/';
|
|
||||||
$entries = [];
|
|
||||||
if (($handle = fopen($csvFile, 'r')) !== false) {
|
|
||||||
// Collect the file paths to be deleted
|
|
||||||
while (($data = fgetcsv($handle)) !== false) {
|
|
||||||
// Skip the header row
|
|
||||||
if ($data[0] === 'File/Attachment') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
$filePath = $directory . $data[1];
|
|
||||||
$entries[] = $filePath;
|
|
||||||
}
|
|
||||||
fclose($handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Prompt the user for confirmation before deleting the files
|
|
||||||
echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL;
|
|
||||||
echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL;
|
|
||||||
echo "Do you want to delete the physical files listed in the CSV report? (yes/no): ";
|
|
||||||
$confirmation = trim(fgets(STDIN));
|
|
||||||
|
|
||||||
if (strtolower($confirmation) === 'yes') {
|
|
||||||
// Create a log file to record the deleted files
|
|
||||||
$logFile = 'deleted_files.log';
|
|
||||||
$logHandle = fopen($logFile, 'a');
|
|
||||||
|
|
||||||
// Delete the physical files
|
|
||||||
foreach ($entries as $filePath) {
|
|
||||||
if (file_exists($filePath)) {
|
|
||||||
// unlink($filePath);
|
|
||||||
echo "File deleted: $filePath" . PHP_EOL;
|
|
||||||
|
|
||||||
// Write the deleted file path to the log file
|
|
||||||
fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL);
|
|
||||||
} else {
|
|
||||||
echo "File not found: $filePath" . PHP_EOL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fclose($logHandle);
|
|
||||||
|
|
||||||
echo "Deletion completed. The list of deleted files is saved in '$logFile'." . PHP_EOL;
|
|
||||||
} else {
|
|
||||||
echo "Deletion of physical files aborted." . PHP_EOL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns cassandra schema version
|
* Returns cassandra schema version
|
||||||
*
|
*
|
||||||
|
@ -913,20 +861,21 @@ class DataConsistencyChecker
|
||||||
* @return int
|
* @return int
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function schema_version()
|
private function schema_version(): int
|
||||||
{
|
{
|
||||||
return static::$schemaVersion;
|
return static::$schemaVersion;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns info(id,size) for attachment file
|
* Returns info(id,size) for attachment file
|
||||||
*
|
*
|
||||||
* @param int $clientId
|
* @param int $clientId
|
||||||
* @param string|null $bucketId
|
* @param string|null $bucketId
|
||||||
* @param string $id
|
* @param string $id
|
||||||
*
|
* @return object|null
|
||||||
* @return object
|
|
||||||
*/
|
*/
|
||||||
private function get_info(int $clientId, ?string $bucketId, string $id): ?object
|
|
||||||
|
private function get_info(int $clientId, ?string $bucketId, string $id): object
|
||||||
{
|
{
|
||||||
$attachment = null;
|
$attachment = null;
|
||||||
$args = [
|
$args = [
|
||||||
|
@ -965,6 +914,7 @@ class DataConsistencyChecker
|
||||||
* @param string $id
|
* @param string $id
|
||||||
* @return string
|
* @return string
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _set_bucket(string $id): string
|
private function _set_bucket(string $id): string
|
||||||
{
|
{
|
||||||
return substr($id, 0, static::$bucketMagic);
|
return substr($id, 0, static::$bucketMagic);
|
||||||
|
@ -978,9 +928,10 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _update_attachment_stats(bool $add, int $size): void
|
private function _update_attachment_stats(bool $add, int $size): void
|
||||||
{
|
{
|
||||||
$op = $add ? '+' : '-';
|
$op = $add ? '+' : self::DELIMITER;
|
||||||
$query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?');
|
$query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?');
|
||||||
$this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]);
|
$this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]);
|
||||||
$query = $this->cassandra->prepare(
|
$query = $this->cassandra->prepare(
|
||||||
|
@ -996,6 +947,7 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @return object|null
|
* @return object|null
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _get_attachment_key(string $id): ?object
|
private function _get_attachment_key(string $id): ?object
|
||||||
{
|
{
|
||||||
$result = null;
|
$result = null;
|
||||||
|
@ -1009,7 +961,6 @@ class DataConsistencyChecker
|
||||||
if ($data && $data->valid()) {
|
if ($data && $data->valid()) {
|
||||||
$result = (object) $data->current();
|
$result = (object) $data->current();
|
||||||
}
|
}
|
||||||
;
|
|
||||||
return $result;
|
return $result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1018,14 +969,14 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @param string $attachmentId
|
* @param string $attachmentId
|
||||||
*
|
*
|
||||||
* @return array
|
* @return object|null
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _get_attachment_by_id(string $attachmentId): ?object
|
private function _get_attachment_by_id(string $attachmentId): ?object
|
||||||
{
|
{
|
||||||
$attachment = null;
|
$attachment = null;
|
||||||
try {
|
try {
|
||||||
$properties = '*';
|
$properties = '*';
|
||||||
echo "attid: " . $attachmentId . PHP_EOL;
|
|
||||||
$key = $this->_get_attachment_key($attachmentId);
|
$key = $this->_get_attachment_key($attachmentId);
|
||||||
if ($key) {
|
if ($key) {
|
||||||
if ($this->schema_version() === 1) {
|
if ($this->schema_version() === 1) {
|
||||||
|
@ -1050,8 +1001,6 @@ class DataConsistencyChecker
|
||||||
}
|
}
|
||||||
} catch (Cassandra\Exception\InvalidArgumentException $e) {
|
} catch (Cassandra\Exception\InvalidArgumentException $e) {
|
||||||
}
|
}
|
||||||
//echo "ATTA: \n";
|
|
||||||
//var_dump($attachment);
|
|
||||||
|
|
||||||
return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null;
|
return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null;
|
||||||
}
|
}
|
||||||
|
@ -1061,16 +1010,19 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @param string $id
|
* @param string $id
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _delete_attachment_key(string $id): void
|
private function _delete_attachment_key(string $id): void
|
||||||
{
|
{
|
||||||
$result = null;
|
$this->cassandra->executeAsync(
|
||||||
$query = $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?');
|
$this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'),
|
||||||
$arguments = [
|
[
|
||||||
|
'arguments' => [
|
||||||
'client_id' => static::$clientId,
|
'client_id' => static::$clientId,
|
||||||
'bucket' => $this->_set_bucket($id),
|
'bucket' => $this->_set_bucket($id),
|
||||||
'id' => $id,
|
'id' => $id
|
||||||
];
|
]
|
||||||
$this->cassandra->executeAsync($query, ['arguments' => $arguments]);
|
]
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1082,6 +1034,7 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @return void
|
* @return void
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void
|
private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void
|
||||||
{
|
{
|
||||||
$queryArguments = [
|
$queryArguments = [
|
||||||
|
@ -1089,7 +1042,7 @@ class DataConsistencyChecker
|
||||||
'bucket' => $this->_set_bucket($data_id),
|
'bucket' => $this->_set_bucket($data_id),
|
||||||
'id' => $data_id,
|
'id' => $data_id,
|
||||||
];
|
];
|
||||||
$query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : '-') . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?');
|
$query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : self::DELIMITER) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?');
|
||||||
$this->cassandra->executeAsync($query, ['arguments' => $queryArguments]);
|
$this->cassandra->executeAsync($query, ['arguments' => $queryArguments]);
|
||||||
|
|
||||||
if (!empty($attachmentId)) {
|
if (!empty($attachmentId)) {
|
||||||
|
@ -1115,6 +1068,7 @@ class DataConsistencyChecker
|
||||||
*
|
*
|
||||||
* @return bool
|
* @return bool
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool
|
public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool
|
||||||
{
|
{
|
||||||
static::$clientId = $clientId;
|
static::$clientId = $clientId;
|
||||||
|
@ -1135,9 +1089,6 @@ class DataConsistencyChecker
|
||||||
}
|
}
|
||||||
$query = $this->cassandra->prepare($q);
|
$query = $this->cassandra->prepare($q);
|
||||||
$this->cassandra->execute($query, ['arguments' => $refData]);
|
$this->cassandra->execute($query, ['arguments' => $refData]);
|
||||||
|
|
||||||
|
|
||||||
echo "DEL REFS\n";
|
|
||||||
$refData['bucket'] = $this->_set_bucket($id);
|
$refData['bucket'] = $this->_set_bucket($id);
|
||||||
$query = $this->cassandra->prepare(
|
$query = $this->cassandra->prepare(
|
||||||
'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?'
|
'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?'
|
||||||
|
@ -1150,7 +1101,6 @@ class DataConsistencyChecker
|
||||||
);
|
);
|
||||||
$result = $this->cassandra->execute($query, ['arguments' => $refData]);
|
$result = $this->cassandra->execute($query, ['arguments' => $refData]);
|
||||||
|
|
||||||
echo "DEL ATTACHMENTS\n";
|
|
||||||
if ($this->schema_version() === 1) {
|
if ($this->schema_version() === 1) {
|
||||||
$delQuery = $this->cassandra->prepare(
|
$delQuery = $this->cassandra->prepare(
|
||||||
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
|
'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'
|
||||||
|
@ -1258,6 +1208,7 @@ class DataConsistencyChecker
|
||||||
* file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,,
|
* file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,,
|
||||||
* cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def
|
* cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public function processAttachmentDeletionCSV(string $file, string $src): void
|
public function processAttachmentDeletionCSV(string $file, string $src): void
|
||||||
{
|
{
|
||||||
|
|
||||||
|
@ -1278,7 +1229,7 @@ class DataConsistencyChecker
|
||||||
if ($line === reset($file_lines)) {
|
if ($line === reset($file_lines)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
$values = explode(",", $line);
|
$values = explode(',', $line);
|
||||||
|
|
||||||
if ($values[0] === 'File/Attachment') {
|
if ($values[0] === 'File/Attachment') {
|
||||||
continue;
|
continue;
|
||||||
|
@ -1293,11 +1244,13 @@ class DataConsistencyChecker
|
||||||
$data->size = $values[5];
|
$data->size = $values[5];
|
||||||
$data->created = $values[6];
|
$data->created = $values[6];
|
||||||
$path = dirname($data->path);
|
$path = dirname($data->path);
|
||||||
|
|
||||||
if (count($values) >= 10) {
|
if (count($values) >= 10) {
|
||||||
$data->clientId = (int) $values[7];
|
$data->clientId = (int) $values[7];
|
||||||
$data->bucket = $values[8];
|
$data->bucket = $values[8];
|
||||||
$data->id = $values[9];
|
$data->id = $values[9];
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($data->source === 'Attachment' && $src === 'cassandra') {
|
if ($data->source === 'Attachment' && $src === 'cassandra') {
|
||||||
echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL;
|
echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL;
|
||||||
$this->deleteAttachment($data->clientId, $data->bucket, $data->id);
|
$this->deleteAttachment($data->clientId, $data->bucket, $data->id);
|
||||||
|
@ -1306,12 +1259,14 @@ class DataConsistencyChecker
|
||||||
$filePath = $values[1];
|
$filePath = $values[1];
|
||||||
$thumb1Path = $path . '/' . $values[3];
|
$thumb1Path = $path . '/' . $values[3];
|
||||||
$thumb2Path = $path . '/' . $values[4];
|
$thumb2Path = $path . '/' . $values[4];
|
||||||
|
|
||||||
if (file_exists($thumb1Path) && file_exists($thumb2Path)) {
|
if (file_exists($thumb1Path) && file_exists($thumb2Path)) {
|
||||||
files::delete($thumb1Path);
|
files::delete($thumb1Path);
|
||||||
files::delete($thumb2Path);
|
files::delete($thumb2Path);
|
||||||
fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL);
|
fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL);
|
||||||
fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL);
|
fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (file_exists($filePath)) {
|
if (file_exists($filePath)) {
|
||||||
unlink($filePath);
|
unlink($filePath);
|
||||||
unlink($thumb1Path);
|
unlink($thumb1Path);
|
||||||
|
@ -1325,9 +1280,7 @@ class DataConsistencyChecker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fclose($logHandle);
|
fclose($logHandle);
|
||||||
|
|
||||||
echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL;
|
echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
37
report_template.html
Normal file
37
report_template.html
Normal file
|
@ -0,0 +1,37 @@
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<style>
|
||||||
|
table {
|
||||||
|
font-family: arial, sans-serif;
|
||||||
|
border-collapse: collapse;
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
td, th {
|
||||||
|
border: 1px solid #dddddd;
|
||||||
|
text-align: left;
|
||||||
|
padding: 8px;
|
||||||
|
}
|
||||||
|
tr:nth-child(even) {
|
||||||
|
background-color: #dddddd;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<table>
|
||||||
|
<thead>
|
||||||
|
<tr>
|
||||||
|
<th>File/Attachment</th>
|
||||||
|
<th>File path</th>
|
||||||
|
<th>File name</th>
|
||||||
|
<th>Thumb 1</th>
|
||||||
|
<th>Thumb 2</th>
|
||||||
|
<th>Size</th>
|
||||||
|
<th>Creation Time</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>
|
||||||
|
{{table_rows}}
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
Add table
Add a link
Reference in a new issue