From 4f7d180d2c20a704cdaabc68c87c6a3cadff368d Mon Sep 17 00:00:00 2001 From: Spase Date: Mon, 29 May 2023 08:23:39 +0200 Subject: [PATCH] Changes from feedback --- cassandra_consistency_script.php | 579 ++++++++++++++----------------- report_template.html | 37 ++ 2 files changed, 303 insertions(+), 313 deletions(-) create mode 100644 report_template.html diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 5a6522d..24fa4e0 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -1,19 +1,41 @@ _cluster = Cassandra::cluster() - ->withContactPoints($config['CASSANDRA']['host']) - ->withPort(9042) + ->withContactPoints($cassandraConfig['host']) + ->withPort($cassandraConfig['port']) ->withCredentials( - $config['CASSANDRA']['user'], - $config['CASSANDRA']['password'] + $cassandraConfig['user'], + $cassandraConfig['password'] ) ->build(); if ($this->_cluster) { try { - $this->cassandra = $this->_cluster->connect($config['CASSANDRA']['keyspace']); + $this->cassandra = $this->_cluster->connect($cassandraConfig['keyspace']); } catch (Exception $e) { - echo "err\n"; + echo "An error occurred: " . $e->getMessage() . "\n"; } } - - } /** @@ -57,61 +79,72 @@ class DataConsistencyChecker * @param array $arguments The command line arguments. * @return void */ - public function runFromCommandLine($arguments) - { - $shortOptions = "hd:v:o:r:s:"; - $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; + + public function runFromCommandLine(array $arguments): void + { + $shortOptions = "hd:v:o:r:s:"; + $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; + + $options = getopt($shortOptions, $longOptions); + + if (count($options) === 0 || isset($options['h']) || isset($options['help'])) { + $this->displayHelpMessage(); + exit; + } + + $directory = $options['directory'] ?? $options['d'] ?? null; + $schemaVersion = $options['version'] ?? $options['v'] ?? null; + $source = $options['source'] ?? $options['s'] ?? null; + $remove = $options['remove'] ?? $options['r'] ?? null; + $structured_directory = $options['output'] ?? $options['o'] ?? null; + + if (!file_exists($structured_directory)) { + if ($structured_directory === null) { + echo "Output directory is required. Please specify the --output option.\n"; + exit; + } + mkdir($structured_directory, 0777, true); + } + + $this->structured_directory = $structured_directory; + + if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { + echo "Missing Attachment directory or schema version.\n"; + exit; + } + + if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { + echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; + exit; + } - $options = getopt($shortOptions, $longOptions); - - if (count($options) == 0 || isset($options['h']) || isset($options['help'])) { - $this->displayHelpMessage(); + if ($structured_directory === null) { + echo "Output directory is required. Please specify the --output option.\n"; exit; - } - - $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); - $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); - - $source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null); - $remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null); - - $structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null); - if (!file_exists($structured_directory)) { - mkdir($structured_directory, 0777, true); - } - $this->structured_directory = $structured_directory; - - if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { - echo "Missing Attachment directory or schema version.\n"; - exit; - } - if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { - echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; - exit; - } - if ($this->structured_directory == null) { - $this->structured_directory = './'; - } - - static::$schemaVersion = (int) $schemaVersion; - $this->directory = $directory; - $this->retrived_csv = './result_from_physical_files.csv'; - if ($remove && $source) { - $this->processAttachmentDeletionCSV($remove, $source); - } else { - $this->checkConsistency('attachment_file_info'); - if (is_dir($this->structured_directory)) { - $this->removeDirectory($this->structured_directory); - } - } - exit; - } + } + + static::$schemaVersion = (int) $schemaVersion; + $this->directory = $directory; + $this->retrived_csv = './result_from_physical_files.csv'; + + if ($remove && $source) { + $this->processAttachmentDeletionCSV($remove, $source); + } else { + $this->checkConsistency('attachment_file_info'); + if (is_dir($this->structured_directory)) { + $this->removeDirectory($this->structured_directory); + } + } + exit; + } + /** * Displays the help message with instructions on how to use the script. * * @return void */ + private function displayHelpMessage(): void { $helpMessage = <<init(); $dbEntries = $this->getDbEntries($tableName); @@ -182,7 +219,7 @@ class DataConsistencyChecker * @return array An array containing the file entries organized by dynamic values. */ - private function getFileEntries($directory): array + private function getFileEntries(string $directory): array { $files = glob($directory . '/*'); $entries = []; @@ -190,19 +227,20 @@ class DataConsistencyChecker foreach ($files as $file) { if (is_file($file)) { $fileName = basename($file); + if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } - $dashParts = explode("-", $fileName, 2); - $dotParts = explode(".", $fileName); + $dashParts = explode(self::DELIMITER, $fileName, 2); + $dotParts = explode('.', $fileName); if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; $fileParts = $dashParts; - $delimiter = '-'; + $delimiter = self::DELIMITER; } elseif (count($dotParts) >= 2) { $clientId = $dotParts[0]; - $id = implode(".", array_slice($dotParts, 1)); + $id = implode('.', array_slice($dotParts, 1)); $fileParts = $dotParts; $delimiter = '.'; } else { @@ -228,7 +266,6 @@ class DataConsistencyChecker foreach ($entries as $clientId => $clientEntries) { $this->createPhysicalFileCSV($clientId, $clientEntries); } - return $entries; } @@ -241,9 +278,9 @@ class DataConsistencyChecker * @return void */ - private function createPhysicalFileCSV($clientId, $entries) + private function createPhysicalFileCSV(string $clientId, array $entries): void { - $fileName = $this->structured_directory . "physical_" . $clientId . ".csv"; + $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time']); foreach ($entries as $entry) { @@ -265,9 +302,9 @@ class DataConsistencyChecker * @return void */ - private function createDBFileCSV($clientId, $entries): void + private function createDBFileCSV(string $clientId, array $entries): void { - $fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv"; + $fileName = $this->structured_directory . 'cassandra_' . (string) $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; @@ -283,10 +320,8 @@ class DataConsistencyChecker $entry['client_id'], $entry['attachment_id'], ]; - fputcsv($csvFile, $rowData); } - fclose($csvFile); } @@ -297,15 +332,13 @@ class DataConsistencyChecker * @return array An array containing the retrieved entries. */ - - private function getDbEntries($tableName) + private function getDbEntries(string $tableName): array { - - if ($this->schema_version() === 1) { - $query = "SELECT client_id, id, size, filename, created_on FROM $tableName"; - } else { - $query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName"; - } + $query = sprintf( + 'SELECT client_id, id, size, filename, created_on%s FROM %s', + $this->schema_version() === 1 ? '' : ', bucket', + $tableName + ); $arguments = []; $result = $this->cassandra->execute( $query, @@ -317,7 +350,6 @@ class DataConsistencyChecker $entries = []; while ($result) { foreach ($result as $row) { - if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); @@ -336,7 +368,7 @@ class DataConsistencyChecker $entries[$dynamicValue][$row['filename']] = $entry; } else { $clientId = substr($row['id'], 0, 2); - $file = $row['client_id'] . '-' . $row['id']; + $file = $row['client_id'] . self::DELIMITER . $row['id']; $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); @@ -357,7 +389,6 @@ class DataConsistencyChecker } $result = $result->nextPage(); } - // Merge all entries into a single array $allEntries = []; foreach ($entries as $clientEntries) { @@ -381,7 +412,7 @@ class DataConsistencyChecker * @return array|null An array containing mismatched entries, or null if the entries match. */ - private function compareFileEntries($id, $data, $cassandra_file_assoc) + private function compareFileEntries(string $id, array $data, array $cassandra_file_assoc) { if (!isset($cassandra_file_assoc[$id])) { return [ @@ -412,7 +443,7 @@ class DataConsistencyChecker * @return array An array containing the lines of the file. */ - private function getFileLines($file): array + private function getFileLines(string $file): array { $file_contents = file_get_contents($file); return explode("\n", $file_contents); @@ -425,7 +456,7 @@ class DataConsistencyChecker * @return array An array of filtered and mapped entries. */ - private function filterAndMapEntries($entries): array + private function filterAndMapEntries(array $entries): array { $filtered_entries = array_filter($entries); $mapped_entries = array_map(function ($entry) { @@ -440,18 +471,27 @@ class DataConsistencyChecker * @param string $dir The directory path containing the files to be processed. * @return void */ - private function process_files_in_directory($dir): void + + private function process_files_in_directory(string $dir): void { $files = glob($dir . '/*.csv'); $physical_files = array(); $cassandra_files = array(); foreach ($files as $file) { - $filename = basename($file, '.csv'); - $csv_type = substr($filename, 0, strpos($filename, '_')); - $file_num = substr($filename, strpos($filename, '_') + 1); - if ($csv_type == 'physical') { + $filename = basename($file, static::CSV_EXTENSION); + $csv_type = substr( + $filename, + 0, + strpos($filename, '_') + ); + $file_num = substr( + $filename, + strpos($filename, '_') + 1 + ); + + if ($csv_type === 'physical') { $physical_files[$file_num] = $file; - } elseif ($csv_type == 'cassandra') { + } elseif ($csv_type === 'cassandra') { $cassandra_files[$file_num] = $file; } } @@ -465,7 +505,7 @@ class DataConsistencyChecker $physical_entries = []; foreach ($physical_files as $file_num => $physical_file) { - $cassandra_file = $dir . '/cassandra_' . $file_num . '.csv'; + $cassandra_file = $dir . '/cassandra_' . $file_num . self::CSV_EXTENSION; if (file_exists($physical_file) && file_exists($cassandra_file)) { $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); @@ -476,9 +516,6 @@ class DataConsistencyChecker } else { if (!file_exists($physical_file)) { $missing_physical_files[] = $physical_file; - - echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; - } if (!file_exists($cassandra_file)) { $missing_cassandra_files[] = $cassandra_file; @@ -493,8 +530,8 @@ class DataConsistencyChecker if ($line === reset($physical_file_lines)) { continue; } - $values = explode(",", $line); - if (count($values) == 3) { + $values = explode(',', $line); + if (count($values) === self::NUM_FIELDS) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } @@ -513,7 +550,7 @@ class DataConsistencyChecker } foreach ($cassandra_files as $file_num => $cassandra_file) { - $physical_file = $dir . '/physical_' . $file_num . '.csv'; + $physical_file = $dir . '/physical_' . $file_num . self::CSV_EXTENSION; if (!file_exists($physical_file)) { $cassandra_file_lines = $this->getFileLines($cassandra_file); @@ -532,12 +569,14 @@ class DataConsistencyChecker $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); - $this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv'); - $this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html'); - $this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv'); - $this->generateHtmlReport($result_from_physical_files, 'physical.html'); + + $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); + $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); + $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); + $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); } + /** * Builds an associative array from file lines. * @@ -545,26 +584,23 @@ class DataConsistencyChecker * @return array An associative array representing the file association. */ - private function buildFileAssociation($file_lines): array + private function buildFileAssociation(array $file_lines): array { $file_assoc = []; - foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } - $values = explode(",", $line); - if (count($values) == 3) { - + $values = explode(',', $line); + if (count($values) === self::NUM_FIELDS) { $file_assoc[$values[0]] = [$values[1], $values[2]]; } - if (count($values) > 3) { + if (count($values) > 3) { $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } - return $file_assoc; } @@ -576,7 +612,7 @@ class DataConsistencyChecker * @return array An array containing missing entries or entries with mismatched data. */ - private function compare_csv_files($file1_path, $file2_path): array + private function compare_csv_files(string $file1_path, string $file2_path): array { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); @@ -597,8 +633,16 @@ class DataConsistencyChecker $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); - - $file1_assoc = array_reduce($file1_data, function ($result, $row) use ($id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1) { + $file1_assoc = array_reduce( + $file1_data, function ($result, $row) use ( + $id_index_1, + $size_index_1, + $time_index_1, + $filename_index_1, + $clientid_index_1, + $bucket_index_1, + $attachment_id_index_1 + ) { $result[$row[$id_index_1]] = [ 'id' => $row[$id_index_1], 'file1' => [ @@ -613,7 +657,13 @@ class DataConsistencyChecker ]; return $result; }, []); - $file2_assoc = array_reduce($file2_data, function ($result, $row) use ($id_index_2, $size_index_2, $time_index_2, $filename_index_2) { + $file2_assoc = array_reduce( + $file2_data, function ($result, $row) use ( + $id_index_2, + $size_index_2, + $time_index_2, + $filename_index_2 + ) { $result[$row[$id_index_2]] = [ 'id' => $row[$id_index_2], 'file2' => [ @@ -648,19 +698,28 @@ class DataConsistencyChecker return $missing_entries; } - - - /** * Generates a CSV report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ + private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); - fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket', 'Id']); + fputcsv($fp, [ + self::CSV_COLUMN_FILE_ATTACHMENT, + self::CSV_COLUMN_FILE_PATH, + self::CSV_COLUMN_FILE_NAME, + self::CSV_COLUMN_THUMB1, + self::CSV_COLUMN_THUMB2, + self::CSV_COLUMN_SIZE, + self::CSV_COLUMN_CREATION_TIME, + self::CSV_COLUMN_CLIENT_ID, + self::CSV_COLUMN_BUCKET, + self::CSV_COLUMN_ID + ]); foreach ($inconsistentFiles as $row) { $check_value = $row['id']; @@ -691,7 +750,6 @@ class DataConsistencyChecker $attachmentId, ]); } - fclose($fp); chmod($name, 0666); } @@ -703,11 +761,19 @@ class DataConsistencyChecker * @param string $name The name of the CSV report file to be generated. * @return void */ + private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void { - $fp = fopen($name, 'w'); - fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']); + fputcsv($fp, [ + self::CSV_COLUMN_FILE_ATTACHMENT, + 'Entry Path', + 'Entry Name', + self::CSV_COLUMN_THUMB1, + self::CSV_COLUMN_THUMB2, + self::CSV_COLUMN_SIZE, + self::CSV_COLUMN_CREATION_TIME + ]); foreach ($inconsistentFiles as $row) { $filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; @@ -742,169 +808,51 @@ class DataConsistencyChecker * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ - private function generateHtmlReport(array $inconsistentFiles, string $name): void - { - $file = fopen($name, 'w'); - if (!$file) { - throw new Exception('Failed to open the file for writing.'); - } - $html = ''; - - foreach ($inconsistentFiles as $row) { - - $filePath = $this->directory . '/' . $row['file1'][0]; - $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); - $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); - $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; - $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; - - // Write the properties to the HTML table - $html .= ''; - } - - $html .= '
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($row['file1'][0]) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . '
'; - - fwrite($file, $html); - fclose($file); - } - - /** - * Compares two CSV files and creates a new CSV file containing the matching entries. - * - * @param string $firstFile The path to the first CSV file. - * @param string $secondFile The path to the second CSV file. - * @param string $finalFile The path to the final CSV file to be created. - * @return void - */ - - function compareCSVFilesTransform($firstFile, $secondFile, $finalFile): void - { - // Read the first CSV file - $firstData = array_map('str_getcsv', file($firstFile)); - $firstHeaders = array_shift($firstData); - - // Read the second CSV file - $secondData = array_map('str_getcsv', file($secondFile)); - $secondHeaders = array_shift($secondData); - - // Find the indexes of the columns to compare in both files - $firstIdIndex = array_search('id', $firstHeaders); - $firstSizeIndex = array_search('size', $firstHeaders); - $firstCreationTimeIndex = array_search('creation_time', $firstHeaders); - - $secondIdIndex = array_search('Entry Path', $secondHeaders); - $secondSizeIndex = array_search('size', $secondHeaders); - $secondCreationTimeIndex = array_search('creation_time', $secondHeaders); - - // Get the entries from the first file - $firstEntries = []; - foreach ($firstData as $row) { - $firstEntries[$row[$firstIdIndex]] = [ - 'id' => $row[$firstIdIndex], - 'size' => $row[$firstSizeIndex], - 'creation_time' => $row[$firstCreationTimeIndex] - ]; - } - - // Create the final result CSV file - $finalCsvFile = fopen($finalFile, 'w'); - fputcsv($finalCsvFile, ['id', 'size', 'creation_time']); - - // Compare the entries from the second file with the entries in the first file - foreach ($secondData as $row) { - $secondId = $row[$secondIdIndex]; - $secondSize = $row[$secondSizeIndex]; - $secondCreationTime = $row[$secondCreationTimeIndex]; - - // Check if there is a matching entry in the first file - if ( - isset($firstEntries[$secondId]) && - $firstEntries[$secondId]['size'] === $secondSize && - $firstEntries[$secondId]['creation_time'] === $secondCreationTime - ) { - fputcsv($finalCsvFile, [ - $secondId, - $secondSize, - $secondCreationTime - ]); + private function generateHtmlReport(array $inconsistentFiles, string $name): void + { + $templateFile = 'report_template.html'; + $templateContent = file_get_contents($templateFile); + + if ($templateContent === false) { + throw new Exception('Failed to read the HTML template file.'); + } + + $tableRows = ''; + foreach ($inconsistentFiles as $row) { + $filename = $row['file1'][0]; + $filePath = $this->directory . '/' . $filename; + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); + $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); + $thumb1 = $row['file1'][0] . '-thumb1'; + $thumb2 = $row['file1'][0] . '-thumb2'; + + if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { + $filename = $row['file1'][3]; + $filePath = $this->directory . '/' . $filename; + $thumb1 = ''; + $thumb2 = ''; } - } - - fclose($finalCsvFile); - } - - /** - * Deletes physical files based on the entries listed in a CSV file. - * - * @param string $csvFile The path to the CSV file containing the list of files to delete. - * @return void - */ - - private function deletePhysicalFilesFromCsv(string $csvFile): void - { - $directory = $this->directory . '/'; - $entries = []; - if (($handle = fopen($csvFile, 'r')) !== false) { - // Collect the file paths to be deleted - while (($data = fgetcsv($handle)) !== false) { - // Skip the header row - if ($data[0] === 'File/Attachment') { - continue; - } - - $filePath = $directory . $data[1]; - $entries[] = $filePath; - } - fclose($handle); - } - - // Prompt the user for confirmation before deleting the files - echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; - echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; - echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; - $confirmation = trim(fgets(STDIN)); - - if (strtolower($confirmation) === 'yes') { - // Create a log file to record the deleted files - $logFile = 'deleted_files.log'; - $logHandle = fopen($logFile, 'a'); - - // Delete the physical files - foreach ($entries as $filePath) { - if (file_exists($filePath)) { - // unlink($filePath); - echo "File deleted: $filePath" . PHP_EOL; - - // Write the deleted file path to the log file - fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); - } else { - echo "File not found: $filePath" . PHP_EOL; - } - } - - fclose($logHandle); - - echo "Deletion completed. The list of deleted files is saved in '$logFile'." . PHP_EOL; - } else { - echo "Deletion of physical files aborted." . PHP_EOL; - } - } - + + if (is_string($row['id']) && strpos($row['id'], '.') !== false) { + $filename = $row['id']; + $filePath = $this->directory . '/' . $filename; + $thumb1 = ''; + $thumb2 = ''; + } + + $tableRows .= 'Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($filename) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . ''; + } + $html = str_replace('{{table_rows}}', $tableRows, $templateContent); + $file = fopen($name, 'w'); + + if (!$file) { + throw new Exception('Failed to open the file for writing.'); + } + + fwrite($file, $html); + fclose($file); + } /** * Returns cassandra schema version @@ -912,21 +860,22 @@ class DataConsistencyChecker * * @return int */ - - private function schema_version() + + private function schema_version(): int { return static::$schemaVersion; } + /** * Returns info(id,size) for attachment file * * @param int $clientId * @param string|null $bucketId * @param string $id - * - * @return object + * @return object|null */ - private function get_info(int $clientId, ?string $bucketId, string $id): ?object + + private function get_info(int $clientId, ?string $bucketId, string $id): object { $attachment = null; $args = [ @@ -965,6 +914,7 @@ class DataConsistencyChecker * @param string $id * @return string */ + private function _set_bucket(string $id): string { return substr($id, 0, static::$bucketMagic); @@ -978,9 +928,10 @@ class DataConsistencyChecker * * @return void */ + private function _update_attachment_stats(bool $add, int $size): void { - $op = $add ? '+' : '-'; + $op = $add ? '+' : self::DELIMITER; $query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?'); $this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]); $query = $this->cassandra->prepare( @@ -996,6 +947,7 @@ class DataConsistencyChecker * * @return object|null */ + private function _get_attachment_key(string $id): ?object { $result = null; @@ -1009,7 +961,6 @@ class DataConsistencyChecker if ($data && $data->valid()) { $result = (object) $data->current(); } - ; return $result; } @@ -1018,14 +969,14 @@ class DataConsistencyChecker * * @param string $attachmentId * - * @return array + * @return object|null */ + private function _get_attachment_by_id(string $attachmentId): ?object { $attachment = null; try { $properties = '*'; - echo "attid: " . $attachmentId . PHP_EOL; $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { @@ -1050,8 +1001,6 @@ class DataConsistencyChecker } } catch (Cassandra\Exception\InvalidArgumentException $e) { } - //echo "ATTA: \n"; - //var_dump($attachment); return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; } @@ -1061,16 +1010,19 @@ class DataConsistencyChecker * * @param string $id */ + private function _delete_attachment_key(string $id): void { - $result = null; - $query = $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'); - $arguments = [ - 'client_id' => static::$clientId, - 'bucket' => $this->_set_bucket($id), - 'id' => $id, - ]; - $this->cassandra->executeAsync($query, ['arguments' => $arguments]); + $this->cassandra->executeAsync( + $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'), + [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'bucket' => $this->_set_bucket($id), + 'id' => $id + ] + ] + ); } /** @@ -1082,6 +1034,7 @@ class DataConsistencyChecker * * @return void */ + private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void { $queryArguments = [ @@ -1089,7 +1042,7 @@ class DataConsistencyChecker 'bucket' => $this->_set_bucket($data_id), 'id' => $data_id, ]; - $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : '-') . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); + $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : self::DELIMITER) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); if (!empty($attachmentId)) { @@ -1115,6 +1068,7 @@ class DataConsistencyChecker * * @return bool */ + public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool { static::$clientId = $clientId; @@ -1135,9 +1089,6 @@ class DataConsistencyChecker } $query = $this->cassandra->prepare($q); $this->cassandra->execute($query, ['arguments' => $refData]); - - - echo "DEL REFS\n"; $refData['bucket'] = $this->_set_bucket($id); $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' @@ -1150,7 +1101,6 @@ class DataConsistencyChecker ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); - echo "DEL ATTACHMENTS\n"; if ($this->schema_version() === 1) { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' @@ -1258,6 +1208,7 @@ class DataConsistencyChecker * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def */ + public function processAttachmentDeletionCSV(string $file, string $src): void { @@ -1278,7 +1229,7 @@ class DataConsistencyChecker if ($line === reset($file_lines)) { continue; } - $values = explode(",", $line); + $values = explode(',', $line); if ($values[0] === 'File/Attachment') { continue; @@ -1293,11 +1244,13 @@ class DataConsistencyChecker $data->size = $values[5]; $data->created = $values[6]; $path = dirname($data->path); + if (count($values) >= 10) { $data->clientId = (int) $values[7]; $data->bucket = $values[8]; $data->id = $values[9]; } + if ($data->source === 'Attachment' && $src === 'cassandra') { echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; $this->deleteAttachment($data->clientId, $data->bucket, $data->id); @@ -1306,12 +1259,14 @@ class DataConsistencyChecker $filePath = $values[1]; $thumb1Path = $path . '/' . $values[3]; $thumb2Path = $path . '/' . $values[4]; + if (file_exists($thumb1Path) && file_exists($thumb2Path)) { files::delete($thumb1Path); files::delete($thumb2Path); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); } + if (file_exists($filePath)) { unlink($filePath); unlink($thumb1Path); @@ -1325,9 +1280,7 @@ class DataConsistencyChecker } } } - fclose($logHandle); - echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL; } } diff --git a/report_template.html b/report_template.html new file mode 100644 index 0000000..8e3d699 --- /dev/null +++ b/report_template.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + {{table_rows}} + +
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
+ +