Fixed a few bug reported from testing

This commit is contained in:
Spase 2023-05-30 18:39:44 +02:00
parent 4f7d180d2c
commit b0acef47a1

View file

@ -58,7 +58,7 @@ class DataConsistencyChecker
$cassandraConfig = $config['CASSANDRA']; $cassandraConfig = $config['CASSANDRA'];
$this->_cluster = Cassandra::cluster() $this->_cluster = Cassandra::cluster()
->withContactPoints($cassandraConfig['host']) ->withContactPoints($cassandraConfig['host'])
->withPort($cassandraConfig['port']) ->withPort((int)$cassandraConfig['port'])
->withCredentials( ->withCredentials(
$cassandraConfig['user'], $cassandraConfig['user'],
$cassandraConfig['password'] $cassandraConfig['password']
@ -99,11 +99,15 @@ class DataConsistencyChecker
$structured_directory = $options['output'] ?? $options['o'] ?? null; $structured_directory = $options['output'] ?? $options['o'] ?? null;
if (!file_exists($structured_directory)) { if (!file_exists($structured_directory)) {
if ($structured_directory === null) {
if ($structured_directory === null && $source === null) {
echo "Output directory is required. Please specify the --output option.\n"; echo "Output directory is required. Please specify the --output option.\n";
exit; exit;
} }
mkdir($structured_directory, 0777, true);
if ($structured_directory !== null) {
mkdir($structured_directory, 0777, true);
}
} }
$this->structured_directory = $structured_directory; $this->structured_directory = $structured_directory;
@ -118,7 +122,7 @@ class DataConsistencyChecker
exit; exit;
} }
if ($structured_directory === null) { if ($structured_directory === null && $source === null) {
echo "Output directory is required. Please specify the --output option.\n"; echo "Output directory is required. Please specify the --output option.\n";
exit; exit;
} }
@ -131,6 +135,7 @@ class DataConsistencyChecker
$this->processAttachmentDeletionCSV($remove, $source); $this->processAttachmentDeletionCSV($remove, $source);
} else { } else {
$this->checkConsistency('attachment_file_info'); $this->checkConsistency('attachment_file_info');
if (is_dir($this->structured_directory)) { if (is_dir($this->structured_directory)) {
$this->removeDirectory($this->structured_directory); $this->removeDirectory($this->structured_directory);
} }
@ -153,16 +158,16 @@ class DataConsistencyChecker
Options: Options:
-h, --help Display this help screen. -h, --help Display this help screen.
--version Set the schema version. --version Set the schema version (default: v1).
--directory Set the directory path for attachments. --directory Set the directory path for attachments.
--output Set the folder for temp files --output Set the folder for temp files - note that this folder will be deleted after script completes
--remove We need to choose between file and cassandra what we want to remove --remove We need to choose between file and cassandra what we want to remove
Example: Example:
php script_name --version schema_version --directory=/path/to/directory --output ./out/ php script_name --version schema_version --directory=/path/to/directory --output ./out/
For Delete: For Delete:
php script_name --remove result_from_cassandra_entries.csv --source file - to remove missing physical files php script_name --remove result_from_cassandra_entries.csv --source file - to remove missing physical files
php script_name --remove result_from_physical_files.csv --source cassandra - to remove missing cassandra entries php script_name --remove result_from_physical_files.csv --source cassandra --version 2 - to remove missing cassandra entries
EOT; EOT;
echo $helpMessage; echo $helpMessage;
} }
@ -186,7 +191,7 @@ class DataConsistencyChecker
); );
foreach ($files as $file) { foreach ($files as $file) {
$path = $directory . '/' . $file; $path = $directory . DIRECTORY_SEPARATOR . $file;
if (is_dir($path)) { if (is_dir($path)) {
$this->removeDirectory($path); $this->removeDirectory($path);
@ -231,8 +236,10 @@ class DataConsistencyChecker
if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) {
continue; continue;
} }
$dashParts = explode(self::DELIMITER, $fileName, 2); $dashParts = explode(self::DELIMITER, $fileName, 2);
$dotParts = explode('.', $fileName); $dotParts = explode('.', $fileName);
if (count($dashParts) === 2) { if (count($dashParts) === 2) {
$clientId = $dashParts[0]; $clientId = $dashParts[0];
$id = $dashParts[1]; $id = $dashParts[1];
@ -278,21 +285,24 @@ class DataConsistencyChecker
* @return void * @return void
*/ */
private function createPhysicalFileCSV(string $clientId, array $entries): void private function createPhysicalFileCSV(string $clientId, array $entries): void
{ {
$fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION;
$csvFile = fopen($fileName, 'w'); $csvFile = fopen($fileName, 'w');
fputcsv($csvFile, ['id', 'size', 'creation_time']); fputcsv($csvFile, ['id', 'size', 'creation_time', 'filename']);
foreach ($entries as $entry) { foreach ($entries as $entry) {
fputcsv($csvFile, [ $idx = strpos($entry['file_name'],'.');
$entry['file_name'], $id = $idx === false ? $entry['file_name'] : substr($entry['file_name'],0,$idx);
$entry['size'], fputcsv($csvFile, [
$entry['creation_time'] $id,
]); $entry['size'],
$entry['creation_time'],
$entry['file_name']
]);
} }
fclose($csvFile); fclose($csvFile);
} }
/** /**
* Creates a CSV file containing the entries for a specific client ID. * Creates a CSV file containing the entries for a specific client ID.
@ -350,6 +360,11 @@ class DataConsistencyChecker
$entries = []; $entries = [];
while ($result) { while ($result) {
foreach ($result as $row) { foreach ($result as $row) {
if ($row['bucket'] === 'x') {
continue; // Skip the current iteration if bucket is 'x'
}
if (preg_match('/^[0-9]+$/', $row['id'])) { if (preg_match('/^[0-9]+$/', $row['id'])) {
$dotParts = explode(".", $row['filename'], 2); $dotParts = explode(".", $row['filename'], 2);
$dynamicValue = substr($dotParts[1], 0, 2); $dynamicValue = substr($dotParts[1], 0, 2);
@ -365,7 +380,11 @@ class DataConsistencyChecker
'client_id' => (string) $row['client_id'], 'client_id' => (string) $row['client_id'],
'attachment_id' => (string) $row['id'], 'attachment_id' => (string) $row['id'],
]; ];
$entries[$dynamicValue][$row['filename']] = $entry;
if ($row['bucket'] !== 'x') {
$entries[$dynamicValue][$row['filename']] = $entry;
}
} else { } else {
$clientId = substr($row['id'], 0, 2); $clientId = substr($row['id'], 0, 2);
$file = $row['client_id'] . self::DELIMITER . $row['id']; $file = $row['client_id'] . self::DELIMITER . $row['id'];
@ -384,7 +403,9 @@ class DataConsistencyChecker
if (!isset($entries[$clientId])) { if (!isset($entries[$clientId])) {
$entries[$clientId] = []; $entries[$clientId] = [];
} }
$entries[$clientId][$file] = $entry; if ($row['bucket'] !== 'x') {
$entries[$clientId][$file] = $entry;
}
} }
} }
$result = $result->nextPage(); $result = $result->nextPage();
@ -569,12 +590,10 @@ class DataConsistencyChecker
$result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR);
$result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR);
$this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV);
$this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT);
$this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV);
$this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT);
} }
/** /**
@ -616,10 +635,8 @@ class DataConsistencyChecker
{ {
$file1_data = array_map('str_getcsv', file($file1_path)); $file1_data = array_map('str_getcsv', file($file1_path));
$file2_data = array_map('str_getcsv', file($file2_path)); $file2_data = array_map('str_getcsv', file($file2_path));
$file1_headers = array_shift($file1_data); $file1_headers = array_shift($file1_data);
$file2_headers = array_shift($file2_data); $file2_headers = array_shift($file2_data);
// find indexes of columns in each file // find indexes of columns in each file
$id_index_1 = array_search('id', $file1_headers); $id_index_1 = array_search('id', $file1_headers);
$id_index_2 = array_search('id', $file2_headers); $id_index_2 = array_search('id', $file2_headers);
@ -629,7 +646,6 @@ class DataConsistencyChecker
$time_index_2 = array_search('creation_time', $file2_headers); $time_index_2 = array_search('creation_time', $file2_headers);
$filename_index_1 = array_search('filename', $file1_headers); $filename_index_1 = array_search('filename', $file1_headers);
$filename_index_2 = array_search('filename', $file2_headers); $filename_index_2 = array_search('filename', $file2_headers);
$clientid_index_1 = array_search('client_id', $file1_headers); $clientid_index_1 = array_search('client_id', $file1_headers);
$bucket_index_1 = array_search('bucket', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers);
$attachment_id_index_1 = array_search('attachment_id', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers);
@ -675,7 +691,6 @@ class DataConsistencyChecker
]; ];
return $result; return $result;
}, []); }, []);
$missing_entries = []; $missing_entries = [];
foreach ($file1_assoc as $id => $data) { foreach ($file1_assoc as $id => $data) {
if (!isset($file2_assoc[$id])) { if (!isset($file2_assoc[$id])) {
@ -694,7 +709,6 @@ class DataConsistencyChecker
} }
} }
} }
return $missing_entries; return $missing_entries;
} }
@ -733,9 +747,9 @@ class DataConsistencyChecker
$creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath));
$thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : '';
$thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : '';
$clientId = isset($row['file1'][4]) ? $row['file1'][4] : ''; $bucket = isset($row['file1'][4]) ? $row['file1'][4] : '';
$bucket = isset($row['file1'][5]) ? $row['file1'][5] : ''; $clientId = isset($row['file1'][5]) ? $row['file1'][5] : '';
$attachmentId = isset($row['file1'][5]) ? $row['file1'][6] : ''; $attachmentId = isset($row['file1'][6]) ? $row['file1'][6] : '';
fputcsv($fp, [ fputcsv($fp, [
'Attachment', 'Attachment',
@ -776,11 +790,11 @@ class DataConsistencyChecker
]); ]);
foreach ($inconsistentFiles as $row) { foreach ($inconsistentFiles as $row) {
$filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; $filePath = $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] ? $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] : $this->directory . DIRECTORY_SEPARATOR . $row['file2'][3];
$size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath);
$creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath));
$thumb1 = $row['file1'][0] . '-thumb1'; $thumb1 = $row['file1'][3] . '-thumb1';
$thumb2 = $row['file1'][0] . '-thumb2'; $thumb2 = $row['file1'][3] . '-thumb2';
if (is_string($row['id']) && strpos($row['id'], '.') !== false) { if (is_string($row['id']) && strpos($row['id'], '.') !== false) {
$old_attachment = explode('.', $row['id'])[0]; $old_attachment = explode('.', $row['id'])[0];
$thumb1 = ''; $thumb1 = '';
@ -796,7 +810,6 @@ class DataConsistencyChecker
$creationTime $creationTime
]); ]);
} }
fclose($fp); fclose($fp);
chmod($name, 0666); chmod($name, 0666);
} }
@ -821,7 +834,7 @@ class DataConsistencyChecker
$tableRows = ''; $tableRows = '';
foreach ($inconsistentFiles as $row) { foreach ($inconsistentFiles as $row) {
$filename = $row['file1'][0]; $filename = $row['file1'][0];
$filePath = $this->directory . '/' . $filename; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename;
$size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath);
$creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath));
$thumb1 = $row['file1'][0] . '-thumb1'; $thumb1 = $row['file1'][0] . '-thumb1';
@ -829,14 +842,14 @@ class DataConsistencyChecker
if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) {
$filename = $row['file1'][3]; $filename = $row['file1'][3];
$filePath = $this->directory . '/' . $filename; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename;
$thumb1 = ''; $thumb1 = '';
$thumb2 = ''; $thumb2 = '';
} }
if (is_string($row['id']) && strpos($row['id'], '.') !== false) { if (is_string($row['id']) && strpos($row['id'], '.') !== false) {
$filename = $row['id']; $filename = $row['id'];
$filePath = $this->directory . '/' . $filename; $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename;
$thumb1 = ''; $thumb1 = '';
$thumb2 = ''; $thumb2 = '';
} }
@ -875,7 +888,7 @@ class DataConsistencyChecker
* @return object|null * @return object|null
*/ */
private function get_info(int $clientId, ?string $bucketId, string $id): object private function get_info(int $clientId, ?string $bucketId, string $id): ?object
{ {
$attachment = null; $attachment = null;
$args = [ $args = [
@ -887,7 +900,6 @@ class DataConsistencyChecker
$query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?');
} else { } else {
$q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\'';
echo "Q: [" . $q . "]\n";
$query = $this->cassandra->prepare($q); $query = $this->cassandra->prepare($q);
} }
$res = $this->cassandra->execute( $res = $this->cassandra->execute(
@ -958,6 +970,7 @@ class DataConsistencyChecker
'id' => $id, 'id' => $id,
]; ];
$data = $this->cassandra->execute($query, ['arguments' => $arguments]); $data = $this->cassandra->execute($query, ['arguments' => $arguments]);
if ($data && $data->valid()) { if ($data && $data->valid()) {
$result = (object) $data->current(); $result = (object) $data->current();
} }
@ -1220,12 +1233,15 @@ class DataConsistencyChecker
if (strtolower($confirmation) !== 'yes') { if (strtolower($confirmation) !== 'yes') {
return; return;
} }
$this->init();
$file_contents = file_get_contents($file); $file_contents = file_get_contents($file);
$file_lines = explode("\n", $file_contents); $file_lines = explode("\n", $file_contents);
$logFile = 'deleted_files.log'; $logFile = 'deleted_files.log';
$logHandle = fopen($logFile, 'a'); $logHandle = fopen($logFile, 'a');
foreach ($file_lines as $line) { foreach ($file_lines as $line) {
if ($line === reset($file_lines)) { if ($line === reset($file_lines)) {
continue; continue;
} }
@ -1234,6 +1250,7 @@ class DataConsistencyChecker
if ($values[0] === 'File/Attachment') { if ($values[0] === 'File/Attachment') {
continue; continue;
} }
if (count($values) >= 7) { if (count($values) >= 7) {
$data = (object) array(); $data = (object) array();
$data->source = $values[0]; $data->source = $values[0];
@ -1257,12 +1274,15 @@ class DataConsistencyChecker
fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL);
} else if ($data->source === 'File' && $src === 'file') { } else if ($data->source === 'File' && $src === 'file') {
$filePath = $values[1]; $filePath = $values[1];
$thumb1Path = $path . '/' . $values[3]; $thumb1Path = $path . DIRECTORY_SEPARATOR . $values[3];
$thumb2Path = $path . '/' . $values[4]; $thumb2Path = $path . DIRECTORY_SEPARATOR . $values[4];
$filePath = str_replace('"', '', $filePath);
$thumb1Path = str_replace('"', '', $thumb1Path);
$thumb2Path = str_replace('"', '', $thumb2Path);
if (file_exists($thumb1Path) && file_exists($thumb2Path)) { if (file_exists($thumb1Path) && file_exists($thumb2Path)) {
files::delete($thumb1Path); unlink($thumb1Path);
files::delete($thumb2Path); unlink($thumb2Path);
fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL);
fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL);
} }
@ -1286,4 +1306,4 @@ class DataConsistencyChecker
} }
$checker = new DataConsistencyChecker(); $checker = new DataConsistencyChecker();
$checker->checkConsistency("attachment_file_info", true); $checker->checkConsistency("attachment_file_info", true);