From bd683608665115206300ff18da44e8ad13c09989 Mon Sep 17 00:00:00 2001 From: GbArc Date: Fri, 19 May 2023 13:41:26 +0200 Subject: [PATCH 1/4] params to remove, udpated dirs --- cassandra_consistency_script.php | 69 +++++++++++++++++++++----------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 68e0ef9..d7f253a 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -12,6 +12,8 @@ class DataConsistencyChecker const DEFAULT_PAGE_SIZE = 3; const CASSANDRA_USERNAME = 'cassandra'; const CASSANDRA_PASSWORD = 'cassandra'; + const CASSANDRA_KEYSPACE = 'tr_key'; + private $_cluster; private $session; private $cassandra; @@ -19,7 +21,7 @@ class DataConsistencyChecker private $directory; private $structured_directory = '/usr/share/nginx/html/testrail/db/cassandra'; - private $retrived_csv = '/usr/share/nginx/html/testrail/db/cassandra/result_from_physical_files.csv'; + private $retrived_csv; static $clientId; private static $schemaVersion = 1; static $fileBucketCounter = 1; @@ -39,12 +41,16 @@ class DataConsistencyChecker public function init(): void { $this->_cluster = Cassandra::cluster() - ->withContactPoints('cassandra') - ->withPort(9042) + ->withContactPoints(static::$cassandraHost) + ->withPort(9042) + ->withCredentials( + static::CASSANDRA_USERNAME, + static::CASSANDRA_PASSWORD + ) ->build(); if ($this->_cluster) { try { - $this->cassandra = $this->_cluster->connect("tr_key"); + $this->cassandra = $this->_cluster->connect(static::CASSANDRA_KEYSPACE); } catch (Exception $e) { echo "err\n"; } @@ -55,8 +61,8 @@ class DataConsistencyChecker public function runFromCommandLine($arguments) { - $shortOptions = "hd:v"; - $longOptions = ["help", "directory:", "version:", "v"]; + $shortOptions = "hd:v:o:r:s:"; + $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; $options = getopt($shortOptions, $longOptions); @@ -68,18 +74,32 @@ class DataConsistencyChecker $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); - if ($directory === null || $schemaVersion === null) { + $source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null); + $remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null); + + $this->structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null); + + if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { echo "Missing Attachment directory or schema version.\n"; exit; } - if (!in_array($schemaVersion, [1, 2])) { + if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; exit; - } + } + if ($this->structured_directory == null) { + $this->structured_directory = './'; + } - static::$schemaVersion = (int)$schemaVersion; - - $this->checkConsistency('attachment_file_info'); + static::$schemaVersion = (int)$schemaVersion; + $this->directory = $directory; + $this->retrived_csv = './result_from_physical_files.csv'; + if ($remove && $source) { + $this->processAttachmentDeletionCSV($remove, $source); + } + else { + $this->checkConsistency('attachment_file_info'); + } exit; } @@ -109,7 +129,7 @@ class DataConsistencyChecker $dbEntries = $this->getDbEntries($tableName); $fileEntries = $this->getFileEntries($this->directory); $this->process_files_in_directory($this->structured_directory); - $this->createCSVById($this->retrived_csv); + //$this->createCSVById($this->retrived_csv); } private function getFileEntries($directory) @@ -177,7 +197,7 @@ class DataConsistencyChecker private function createPhysicalFileCSV($clientId, $entries) { - $fileName = "physical_" . $clientId . ".csv"; + $fileName = $this->structured_directory . "physical_" . $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time']); foreach ($entries as $entry) { @@ -194,7 +214,7 @@ class DataConsistencyChecker private function createDBFileCSV($clientId, $entries) { - $fileName = "cassandra_" . (string) $clientId . ".csv"; + $fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; @@ -305,7 +325,10 @@ class DataConsistencyChecker $old_migrated_data = array_search('migrated_files_id', $headers); - $query = "SELECT * FROM attachment_file_info WHERE id = ? ALLOW FILTERING"; + $query = "SELECT * FROM attachment_file_info WHERE id = ? and client_id = ?"; + if ($this->schema_version() == 2) { + $query = $query . ' and bucket = ?'; + } $statement = $this->cassandra->prepare($query); foreach ($fileData as $row) { @@ -492,7 +515,7 @@ class DataConsistencyChecker if (!isset($physical_files_assoc[$id])) { $missing_physical_files[] = [ 'id' => $id, - 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5], $data[6]], + 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5]], ]; } } @@ -527,7 +550,7 @@ class DataConsistencyChecker } if (count($values) > 3) { - $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[0]]; + $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } @@ -893,7 +916,7 @@ class DataConsistencyChecker ]; if ($this->schema_version() == 1) { - $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? ALLOW FILTERING'); + $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); } else { $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; echo "Q: [" . $q . "]\n"; @@ -991,7 +1014,7 @@ class DataConsistencyChecker $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { - $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? ALLOW FILTERING'); + $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, @@ -1261,11 +1284,10 @@ class DataConsistencyChecker $data->bucket = $values[8]; $data->id = $values[9]; } - if ($data->source == $src) { - if ($src === 'cassandra') { + if ($data->source === 'Attachment' && $src === 'cassandra') { echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); - } else { + } else if ($src === 'File' && $source === 'file'){ $filePath = $directory . $values[1]; $thumb1Path = $directory . $values[1]; $thumb2Path = $directory . $values[1]; @@ -1280,7 +1302,6 @@ class DataConsistencyChecker echo "File not found: $filePath" . PHP_EOL; } } - } } } From a35654a07152b64f5d316f99ed8562177f10f22f Mon Sep 17 00:00:00 2001 From: Spase Date: Fri, 19 May 2023 19:01:57 +0200 Subject: [PATCH 2/4] Added additional changes and documentation --- cassandra_consistency_script.php | 647 ++++++++++++++++--------------- config.ini | 15 + 2 files changed, 357 insertions(+), 305 deletions(-) create mode 100644 config.ini diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 68e0ef9..5a6522d 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -1,28 +1,17 @@ runFromCommandLine($_SERVER['argv']); } - + /** + * Initializes the Cassandra connection based on the configuration settings. + * + * @return void + */ public function init(): void { + $config = parse_ini_file("config.ini", true); $this->_cluster = Cassandra::cluster() - ->withContactPoints('cassandra') + ->withContactPoints($config['CASSANDRA']['host']) ->withPort(9042) + ->withCredentials( + $config['CASSANDRA']['user'], + $config['CASSANDRA']['password'] + ) ->build(); if ($this->_cluster) { try { - $this->cassandra = $this->_cluster->connect("tr_key"); + $this->cassandra = $this->_cluster->connect($config['CASSANDRA']['keyspace']); } catch (Exception $e) { echo "err\n"; } } - // static::$bucketMagic = defined('CASSANDRA_BUCKET_MAGIC') ? (int) CASSANDRA_BUCKET_MAGIC : 4; + } + /** + * Runs the script from the command line with the provided arguments. + * + * @param array $arguments The command line arguments. + * @return void + */ public function runFromCommandLine($arguments) { - $shortOptions = "hd:v"; - $longOptions = ["help", "directory:", "version:", "v"]; + $shortOptions = "hd:v:o:r:s:"; + $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; $options = getopt($shortOptions, $longOptions); @@ -68,22 +72,47 @@ class DataConsistencyChecker $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); - if ($directory === null || $schemaVersion === null) { + $source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null); + $remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null); + + $structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null); + if (!file_exists($structured_directory)) { + mkdir($structured_directory, 0777, true); + } + $this->structured_directory = $structured_directory; + + if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { echo "Missing Attachment directory or schema version.\n"; exit; } - if (!in_array($schemaVersion, [1, 2])) { + if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; exit; } + if ($this->structured_directory == null) { + $this->structured_directory = './'; + } - static::$schemaVersion = (int)$schemaVersion; - - $this->checkConsistency('attachment_file_info'); + static::$schemaVersion = (int) $schemaVersion; + $this->directory = $directory; + $this->retrived_csv = './result_from_physical_files.csv'; + if ($remove && $source) { + $this->processAttachmentDeletionCSV($remove, $source); + } else { + $this->checkConsistency('attachment_file_info'); + if (is_dir($this->structured_directory)) { + $this->removeDirectory($this->structured_directory); + } + } exit; - } + } - private function displayHelpMessage() + /** + * Displays the help message with instructions on how to use the script. + * + * @return void + */ + private function displayHelpMessage(): void { $helpMessage = <<removeDirectory($path); + } else { + unlink($path); + } + } + + rmdir($directory); + } + + /** + * Checks the consistency between database entries and file entries. + * + * @param string $tableName The name of the table in the database to check consistency for. + * @return void + */ public function checkConsistency($tableName) { - - //initialize the cassandra connection $this->init(); $dbEntries = $this->getDbEntries($tableName); $fileEntries = $this->getFileEntries($this->directory); $this->process_files_in_directory($this->structured_directory); - $this->createCSVById($this->retrived_csv); } - private function getFileEntries($directory) + /** + * Retrieves file entries from a directory and organizes them based on dynamic values. + * + * @param string $directory The directory path to retrieve file entries from. + * @return array An array containing the file entries organized by dynamic values. + */ + + private function getFileEntries($directory): array { $files = glob($directory . '/*'); $entries = []; @@ -120,15 +190,11 @@ class DataConsistencyChecker foreach ($files as $file) { if (is_file($file)) { $fileName = basename($file); - - // Skip files with specific suffixes if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } - $dashParts = explode("-", $fileName, 2); $dotParts = explode(".", $fileName); - if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; @@ -140,16 +206,12 @@ class DataConsistencyChecker $fileParts = $dotParts; $delimiter = '.'; } else { - // Handle cases where the file name does not contain either a dash or a dot continue; } $filePath = $file; $size = filesize($filePath); - - // Use file modification time (filemtime()) instead of file creation time (filectime()) $creationTime = date('Y-m-d H:i:s', filemtime($filePath)); - $dynamicValue = substr($id, 0, 2); $creationTime = str_replace('"', '', $creationTime); @@ -162,7 +224,6 @@ class DataConsistencyChecker ]; } } - // create CSV file for each client ID's physical file entries foreach ($entries as $clientId => $clientEntries) { $this->createPhysicalFileCSV($clientId, $clientEntries); @@ -172,29 +233,41 @@ class DataConsistencyChecker } - - + /** + * Creates a CSV file containing the physical file entries for a specific client ID. + * + * @param string $clientId The client ID. + * @param array $entries An array containing the physical file entries for the client. + * @return void + */ private function createPhysicalFileCSV($clientId, $entries) { - $fileName = "physical_" . $clientId . ".csv"; + $fileName = $this->structured_directory . "physical_" . $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time']); foreach ($entries as $entry) { fputcsv($csvFile, [ $entry['file_name'], $entry['size'], - $entry['creation_time'] + $entry['creation_time'] ]); } fclose($csvFile); } + /** + * Creates a CSV file containing the entries for a specific client ID. + * + * @param string $clientId The client ID. + * @param array $entries An array containing the entries for the client. + * @return void + */ - private function createDBFileCSV($clientId, $entries) + private function createDBFileCSV($clientId, $entries): void { - $fileName = "cassandra_" . (string) $clientId . ".csv"; + $fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv"; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; @@ -207,7 +280,7 @@ class DataConsistencyChecker $entry['creation_time'], $entry['filename'], $entry['bucket'], - $entry['client_id'], + $entry['client_id'], $entry['attachment_id'], ]; @@ -217,12 +290,18 @@ class DataConsistencyChecker fclose($csvFile); } + /** + * Retrieves entries from a database table. + * + * @param string $tableName The name of the database table. + * @return array An array containing the retrieved entries. + */ private function getDbEntries($tableName) - { - - if($this->schema_version() === 1) { + { + + if ($this->schema_version() === 1) { $query = "SELECT client_id, id, size, filename, created_on FROM $tableName"; } else { $query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName"; @@ -238,6 +317,7 @@ class DataConsistencyChecker $entries = []; while ($result) { foreach ($result as $row) { + if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); @@ -245,7 +325,6 @@ class DataConsistencyChecker $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); $entry = [ - "id" => $row['id'], "size" => (string) $row['size'], "creation_time" => $creationTime, @@ -291,82 +370,17 @@ class DataConsistencyChecker return $entries; } - - private function createCSVById($csvFile) - { - $data = []; - $fileData = array_map('str_getcsv', file($csvFile)); - $headers = array_shift($fileData); + /** + * Compares a file entry with its corresponding entry in the Cassandra file association. + * + * @param string $id The ID of the file entry. + * @param array $data An array containing data of the file entry. + * @param array $cassandra_file_assoc The Cassandra file association. + * @return array|null An array containing mismatched entries, or null if the entries match. + */ - - $idIndex = array_search('File path', $headers); - $old_migrated_data = array_search('migrated_files_id', $headers); - - - $query = "SELECT * FROM attachment_file_info WHERE id = ? ALLOW FILTERING"; - $statement = $this->cassandra->prepare($query); - - foreach ($fileData as $row) { - if ($old_migrated_data !== false && isset($row[$old_migrated_data])) { - $id = $row[$old_migrated_data]; - } else { - $id = $row[2]; - } - - $options = ['arguments' => [$id]]; - - - $result = $this->cassandra->execute($statement, $options); - - - foreach ($result as $row) { - $data[] = $row; - } - } - - - $fileName = "data_by_id.csv"; - $csvFile = fopen($fileName, 'w'); - - fputcsv($csvFile, $headers); - - foreach ($data as $entry) { - $rowData = []; - - foreach ($headers as $field) { - $value = isset($entry[$field]) ? $entry[$field] : ''; - $rowData[] = $value; - } - - fputcsv($csvFile, $rowData); - } - - fclose($csvFile); - } - - private function parseCSVFile($file) - { - $file_contents = file_get_contents($file); - $file_lines = explode("\n", $file_contents); - $file_assoc = array(); - - foreach ($file_lines as $line) { - if ($line === reset($file_lines)) { - continue; - } - $values = explode(",", $line); - if (count($values) == 3) { - $values[2] = str_replace('"', '', $values[2]); - $file_assoc[$values[0]] = [$values[1], $values[2]]; - } - } - - return $file_assoc; - } - - // Helper function to read file contents and parse CSV lines private function compareFileEntries($id, $data, $cassandra_file_assoc) { if (!isset($cassandra_file_assoc[$id])) { @@ -391,13 +405,27 @@ class DataConsistencyChecker return null; } - private function getFileLines($file) + /** + * Retrieves the lines of a file and returns them as an array. + * + * @param string $file The path to the file. + * @return array An array containing the lines of the file. + */ + + private function getFileLines($file): array { $file_contents = file_get_contents($file); return explode("\n", $file_contents); } - private function filterAndMapEntries($entries) + /** + * Filters out null entries and maps the remaining entries to their first element. + * + * @param array $entries An array containing entries to be filtered and mapped. + * @return array An array of filtered and mapped entries. + */ + + private function filterAndMapEntries($entries): array { $filtered_entries = array_filter($entries); $mapped_entries = array_map(function ($entry) { @@ -405,7 +433,14 @@ class DataConsistencyChecker }, $filtered_entries); return array_values($mapped_entries); } - private function process_files_in_directory($dir) + + /** + * Processes files in a directory, performs comparisons, and generates CSV and HTML reports. + * + * @param string $dir The directory path containing the files to be processed. + * @return void + */ + private function process_files_in_directory($dir): void { $files = glob($dir . '/*.csv'); $physical_files = array(); @@ -436,15 +471,13 @@ class DataConsistencyChecker $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); $compared_cassandra[] = $this->compare_csv_files($cassandra_file, $physical_file); $physical_entries = $this->filterAndMapEntries($compared_cassandra); - - //echo "phe: " . var_dump($physical_entries) . PHP_EOL; $cassandra_entries = $this->filterAndMapEntries($compared_physical); } else { - if (!file_exists($physical_file)) { - $missing_physical_files[] = $physical_file; + if (!file_exists($physical_file)) { + $missing_physical_files[] = $physical_file; - echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; + echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; } if (!file_exists($cassandra_file)) { @@ -460,13 +493,13 @@ class DataConsistencyChecker if ($line === reset($physical_file_lines)) { continue; } - $values = explode(",", $line); + $values = explode(",", $line); if (count($values) == 3) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } } - foreach ($file_assoc as $id => $data) { + foreach ($file_assoc as $id => $data) { if (!isset($cassandra_files_assoc[$id])) { $missing_cassandra_entries[] = [ 'id' => $id, @@ -475,7 +508,7 @@ class DataConsistencyChecker } } } else { - + } } @@ -483,34 +516,36 @@ class DataConsistencyChecker $physical_file = $dir . '/physical_' . $file_num . '.csv'; if (!file_exists($physical_file)) { - $cassandra_file_lines = $this->getFileLines($cassandra_file); - // echo "cfl: " . var_dump($cassandra_file_lines) . PHP_EOL; + $cassandra_file_lines = $this->getFileLines($cassandra_file); $file_assoc = $this->buildFileAssociation($cassandra_file_lines); - foreach ($file_assoc as $id => $data) { - + foreach ($file_assoc as $id => $data) { if (!isset($physical_files_assoc[$id])) { $missing_physical_files[] = [ 'id' => $id, - 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5], $data[6]], + 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5]], ]; } } } } + $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); - - // var_dump($missing_physical_files); - $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); - + $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); $this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv'); $this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html'); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv'); $this->generateHtmlReport($result_from_physical_files, 'physical.html'); - // $this->compareCSVFilesTransform($this->structured_directory . 'cassandra_all_entries.csv', $this->structured_directory . 'result_from_cassandra_entries.csv', $this->structured_directory . 'final_file.csv'); - // $this->deletePhysicalFilesFromCsv('result_from_cassandra_entries.csv'); + } - private function buildFileAssociation($file_lines) + /** + * Builds an associative array from file lines. + * + * @param array $file_lines An array containing lines of a file. + * @return array An associative array representing the file association. + */ + + private function buildFileAssociation($file_lines): array { $file_assoc = []; @@ -520,22 +555,28 @@ class DataConsistencyChecker } $values = explode(",", $line); - $values[2] = str_replace('"', '', $values[2]); if (count($values) == 3) { $file_assoc[$values[0]] = [$values[1], $values[2]]; } if (count($values) > 3) { - $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[0]]; + $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } return $file_assoc; } + /** + * Compares two CSV files and returns missing entries or entries with mismatched data. + * + * @param string $file1_path The file path of the first CSV file. + * @param string $file2_path The file path of the second CSV file. + * @return array An array containing missing entries or entries with mismatched data. + */ - private function compare_csv_files($file1_path, $file2_path) + private function compare_csv_files($file1_path, $file2_path): array { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); @@ -566,7 +607,7 @@ class DataConsistencyChecker $row[$time_index_1], isset($row[$filename_index_1]) ? $row[$filename_index_1] : null, isset($row[$clientid_index_1]) ? $row[$clientid_index_1] : null, - isset($row[$bucket_index_1]) ? $row[$bucket_index_1] : null, + isset($row[$bucket_index_1]) ? $row[$bucket_index_1] : null, isset($row[$attachment_id_index_1]) ? $row[$attachment_id_index_1] : null ], ]; @@ -594,7 +635,7 @@ class DataConsistencyChecker ]; } else { $file2_data = $file2_assoc[$id]['file2']; - if ($data['file1'][1] !== $file2_data[1] || $data['file1'][2] !== $file2_data[2]) { + if ($data['file1'][0] !== $file2_data[0] || $data['file1'][1] !== $file2_data[1]) { $missing_entries[] = [ 'id' => $id, 'file1' => $data['file1'], @@ -619,19 +660,16 @@ class DataConsistencyChecker private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); - fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket' , 'Id']); + fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket', 'Id']); foreach ($inconsistentFiles as $row) { - - $file1Value = $row['file1'][0]; $check_value = $row['id']; - //$check_value = is_numeric($row['id']) ? $row['file1'][3] : $file1Value; - $filePath = $this->directory . '/' . $check_value ? $this->directory . '/' . $check_value : $this->directory . '/' . $row['file2'][0]; + $filePath = $check_value ? $check_value : $row['file2'][0]; if (is_numeric($row['id'])) { - $filePath = $this->directory . '/' . $row['file1'][3]; + $filePath = $row['file1'][3]; $check_value = $row['file1'][3]; - } - + } + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; @@ -658,7 +696,13 @@ class DataConsistencyChecker chmod($name, 0666); } - + /** + * Generates a CSV report for inconsistent database entries. + * + * @param array $inconsistentFiles An array containing inconsistent file data. + * @param string $name The name of the CSV report file to be generated. + * @return void + */ private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void { @@ -666,20 +710,24 @@ class DataConsistencyChecker fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']); foreach ($inconsistentFiles as $row) { - - $filePath = $row['file1'][0] ? $row['file1'][0] : $row['file2'][0]; + $filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); - $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; - $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; + $thumb1 = $row['file1'][0] . '-thumb1'; + $thumb2 = $row['file1'][0] . '-thumb2'; + if (is_string($row['id']) && strpos($row['id'], '.') !== false) { + $old_attachment = explode('.', $row['id'])[0]; + $thumb1 = ''; + $thumb2 = ''; + } fputcsv($fp, [ - 'Attachment', + 'File', $filePath, $row['file1'][0], $thumb1, $thumb2, $size, - $creationTime + $creationTime ]); } @@ -696,6 +744,7 @@ class DataConsistencyChecker */ private function generateHtmlReport(array $inconsistentFiles, string $name): void { + $file = fopen($name, 'w'); if (!$file) { throw new Exception('Failed to open the file for writing.'); @@ -734,7 +783,16 @@ class DataConsistencyChecker fclose($file); } - function compareCSVFilesTransform($firstFile, $secondFile, $finalFile) + /** + * Compares two CSV files and creates a new CSV file containing the matching entries. + * + * @param string $firstFile The path to the first CSV file. + * @param string $secondFile The path to the second CSV file. + * @param string $finalFile The path to the final CSV file to be created. + * @return void + */ + + function compareCSVFilesTransform($firstFile, $secondFile, $finalFile): void { // Read the first CSV file $firstData = array_map('str_getcsv', file($firstFile)); @@ -790,6 +848,12 @@ class DataConsistencyChecker fclose($finalCsvFile); } + /** + * Deletes physical files based on the entries listed in a CSV file. + * + * @param string $csvFile The path to the CSV file containing the list of files to delete. + * @return void + */ private function deletePhysicalFilesFromCsv(string $csvFile): void { @@ -842,35 +906,13 @@ class DataConsistencyChecker } - private static function _is_uuid(string $id): bool - { - $regex = '/^[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}$/'; - return preg_match($regex, $id); - } - - /** - * Check if the attachment string is a valid migrated attachment - * - * @param string $attachmentId - * - * @return bool - */ - private static function _isValidMigratedAttachment(string $attachmentId = ''): bool - { - return str::sub($attachmentId, 0, 2) === OLD_ATTACHMENTS_PREFIX; - } - - - - /* ATTACHMENT DELETION ===================================================================================================================== */ - /** * Returns cassandra schema version * * * @return int */ - + private function schema_version() { return static::$schemaVersion; @@ -893,10 +935,10 @@ class DataConsistencyChecker ]; if ($this->schema_version() == 1) { - $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? ALLOW FILTERING'); - } else { - $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; - echo "Q: [" . $q . "]\n"; + $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); + } else { + $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; + echo "Q: [" . $q . "]\n"; $query = $this->cassandra->prepare($q); } $res = $this->cassandra->execute( @@ -906,14 +948,14 @@ class DataConsistencyChecker ] ); if ($res && $res->valid()) { - $tmp = $res->current(); + $tmp = $res->current(); - $attachment = (object) $tmp; - $attachment->id = (string) $tmp['id']; - if (array_key_exists('size', $tmp)) { - $attachment->size = (int) $tmp['size']; - } - } + $attachment = (object) $tmp; + $attachment->id = (string) $tmp['id']; + if (array_key_exists('size', $tmp)) { + $attachment->size = (int) $tmp['size']; + } + } return $attachment; } @@ -927,7 +969,7 @@ class DataConsistencyChecker { return substr($id, 0, static::$bucketMagic); } - + /** * updates attachment count and size * @@ -956,22 +998,18 @@ class DataConsistencyChecker */ private function _get_attachment_key(string $id): ?object { - //echo "GET KEY!\n"; $result = null; $query = $this->cassandra->prepare('SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?'); $arguments = [ 'client_id' => static::$clientId, 'bucket' => $this->_set_bucket($id), 'id' => $id, - ]; - //echo "args: " . var_dump($arguments, true) . PHP_EOL; - $data = $this->cassandra->execute($query, ['arguments' => $arguments]); - echo "GK2!!!!"; - //var_dump($data); + ]; + $data = $this->cassandra->execute($query, ['arguments' => $arguments]); if ($data && $data->valid()) { $result = (object) $data->current(); - } - //var_dump($result); + } + ; return $result; } @@ -986,12 +1024,12 @@ class DataConsistencyChecker { $attachment = null; try { - $properties = '*'; - echo "attid: ". $attachmentId . PHP_EOL; - $key = $this->_get_attachment_key($attachmentId); + $properties = '*'; + echo "attid: " . $attachmentId . PHP_EOL; + $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { - $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? ALLOW FILTERING'); + $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?'); $arguments = [ 'client_id' => static::$clientId, 'id' => $key->id, @@ -1011,9 +1049,9 @@ class DataConsistencyChecker $attachment = $this->cassandra->execute($query, ['arguments' => $arguments]); } } catch (Cassandra\Exception\InvalidArgumentException $e) { - } - //echo "ATTA: \n"; - //var_dump($attachment); + } + //echo "ATTA: \n"; + //var_dump($attachment); return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; } @@ -1079,28 +1117,27 @@ class DataConsistencyChecker */ public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool { - static::$clientId = $clientId; + static::$clientId = $clientId; $refData = [ 'client_id' => $clientId, 'id' => $id, ]; - $fileInfo = $this->get_info($clientId, $bucketId, $id); - //var_dump($fileInfo); + $fileInfo = $this->get_info($clientId, $bucketId, $id); if ($fileInfo) { $this->_update_attachment_stats(false, $fileInfo->size); - } else { - return false; - } - - $q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? '; - if ($this->schema_version() === 2) { - $q = $q . ' AND bucket = \'' . $bucketId . '\''; - } - $query = $this->cassandra->prepare($q); - $this->cassandra->execute($query, ['arguments' => $refData]); - + } else { + return false; + } - echo "DEL REFS\n"; + $q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? '; + if ($this->schema_version() === 2) { + $q = $q . ' AND bucket = \'' . $bucketId . '\''; + } + $query = $this->cassandra->prepare($q); + $this->cassandra->execute($query, ['arguments' => $refData]); + + + echo "DEL REFS\n"; $refData['bucket'] = $this->_set_bucket($id); $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' @@ -1113,7 +1150,7 @@ class DataConsistencyChecker ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); - echo "DEL ATTACHMENTS\n"; + echo "DEL ATTACHMENTS\n"; if ($this->schema_version() === 1) { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' @@ -1122,15 +1159,15 @@ class DataConsistencyChecker $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' ); - } + } while ($result && $result->valid()) { $attachmentId = $result->current()['attachment_id']; - $key = $this->_get_attachment_key((string) $attachmentId); - if ($key == null) { - $result->next(); - continue; - } - if ($this->schema_version() === 1) { + $key = $this->_get_attachment_key((string) $attachmentId); + if ($key == null) { + $result->next(); + continue; + } + if ($this->schema_version() === 1) { $selectQuery = $this->cassandra->prepare( 'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' ); @@ -1166,40 +1203,40 @@ class DataConsistencyChecker $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' ); - $this->cassandra->execute($query, ['arguments' => $refData]); - $result = false; - $attachment = $this->_get_attachment_by_id($id); - if ($attachment) { - if ($this->schema_version() === 1) { - $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ?"); - $arguments = [ - 'arguments' => [ - 'client_id' => static::$clientId, - 'id' => $id, - 'project_id' => $attachment->project_id, - 'entity_type' => $attachment->entity_type - ], - ]; - } else { - $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ?"); - $arguments = [ - 'arguments' => [ - 'client_id' => static::$clientId, - 'id' => $id, - 'project_id' => $attachment->project_id, - 'entity_type' => $attachment->entity_type, - 'entity_id' => $attachment->entity_id - ], - ]; - } - $queryResult = $this->cassandra->execute($query, $arguments) != null; + $this->cassandra->execute($query, ['arguments' => $refData]); + $result = false; + $attachment = $this->_get_attachment_by_id($id); + if ($attachment) { + if ($this->schema_version() === 1) { + $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ?"); + $arguments = [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'id' => $id, + 'project_id' => $attachment->project_id, + 'entity_type' => $attachment->entity_type + ], + ]; + } else { + $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ?"); + $arguments = [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'id' => $id, + 'project_id' => $attachment->project_id, + 'entity_type' => $attachment->entity_type, + 'entity_id' => $attachment->entity_id + ], + ]; + } + $queryResult = $this->cassandra->execute($query, $arguments) != null; - if ($queryResult) { - $result = true; - $this->_update_file_refs($attachment->data_id, false, $attachment->id); - $this->_delete_attachment_key($id); - } - } + if ($queryResult) { + $result = true; + $this->_update_file_refs($attachment->data_id, false, $attachment->id); + $this->_delete_attachment_key($id); + } + } return $result; } @@ -1221,7 +1258,7 @@ class DataConsistencyChecker * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def */ - public function processAttachmentDeletionCSV(string $file, string $src) : void + public function processAttachmentDeletionCSV(string $file, string $src): void { echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; @@ -1236,7 +1273,6 @@ class DataConsistencyChecker $file_lines = explode("\n", $file_contents); $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); - $directory = $this->directory . '/'; foreach ($file_lines as $line) { if ($line === reset($file_lines)) { @@ -1245,40 +1281,46 @@ class DataConsistencyChecker $values = explode(",", $line); if ($values[0] === 'File/Attachment') { - continue; + continue; } if (count($values) >= 7) { $data = (object) array(); - $data->source = $values[0]; + $data->source = $values[0]; $data->path = $values[1]; $data->name = $values[2]; $data->thumb1 = $values[3]; $data->thumb2 = $values[4]; $data->size = $values[5]; $data->created = $values[6]; - if(count($values) >= 10) { + $path = dirname($data->path); + if (count($values) >= 10) { $data->clientId = (int) $values[7]; $data->bucket = $values[8]; $data->id = $values[9]; } - if ($data->source == $src) { - if ($src === 'cassandra') { - echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; - fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); - } else { - $filePath = $directory . $values[1]; - $thumb1Path = $directory . $values[1]; - $thumb2Path = $directory . $values[1]; - if (file_exists($filePath)) { - // unlink($filePath); - // unlink($thumb1Path); - // unlink($thumb2Path); - echo "File deleted: $filePath" . PHP_EOL; + if ($data->source === 'Attachment' && $src === 'cassandra') { + echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; + $this->deleteAttachment($data->clientId, $data->bucket, $data->id); + fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); + } else if ($data->source === 'File' && $src === 'file') { + $filePath = $values[1]; + $thumb1Path = $path . '/' . $values[3]; + $thumb2Path = $path . '/' . $values[4]; + if (file_exists($thumb1Path) && file_exists($thumb2Path)) { + files::delete($thumb1Path); + files::delete($thumb2Path); + fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); + fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); + } + if (file_exists($filePath)) { + unlink($filePath); + unlink($thumb1Path); + unlink($thumb2Path); + echo "File deleted: $filePath" . PHP_EOL; // Write the deleted file path to the log file - fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); - } else { - echo "File not found: $filePath" . PHP_EOL; - } + fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); + } else { + echo "File not found: $filePath" . PHP_EOL; } } } @@ -1290,10 +1332,5 @@ class DataConsistencyChecker } } -#$options = getopt('', ['directory:']); $checker = new DataConsistencyChecker(); -$checker->checkConsistency("attachment_file_info", true); -#$checker::$cassandraHost = 'localhost'; -#$checker->init($options); -#$checker->deleteAttachment("1", "f", "ff29ead0-8696-4ef1-8120-538d6dd7efd1"); -#$checker->processAttachmentDeletionCSV("todelete.csv", "file"); +$checker->checkConsistency("attachment_file_info", true); \ No newline at end of file diff --git a/config.ini b/config.ini new file mode 100644 index 0000000..c3f6a72 --- /dev/null +++ b/config.ini @@ -0,0 +1,15 @@ +[SQL] +driver = mysql +host = 127.0.0.1 +databaseName = testrail +user = testrail +password = 123456789 +port = 6666 + +[CASSANDRA] +host = cassandra +databaseName = testrail +user = casandra +password = cassandra +port = 9042 +keyspace = testrail From 4f7d180d2c20a704cdaabc68c87c6a3cadff368d Mon Sep 17 00:00:00 2001 From: Spase Date: Mon, 29 May 2023 08:23:39 +0200 Subject: [PATCH 3/4] Changes from feedback --- cassandra_consistency_script.php | 579 ++++++++++++++----------------- report_template.html | 37 ++ 2 files changed, 303 insertions(+), 313 deletions(-) create mode 100644 report_template.html diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 5a6522d..24fa4e0 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -1,19 +1,41 @@ _cluster = Cassandra::cluster() - ->withContactPoints($config['CASSANDRA']['host']) - ->withPort(9042) + ->withContactPoints($cassandraConfig['host']) + ->withPort($cassandraConfig['port']) ->withCredentials( - $config['CASSANDRA']['user'], - $config['CASSANDRA']['password'] + $cassandraConfig['user'], + $cassandraConfig['password'] ) ->build(); if ($this->_cluster) { try { - $this->cassandra = $this->_cluster->connect($config['CASSANDRA']['keyspace']); + $this->cassandra = $this->_cluster->connect($cassandraConfig['keyspace']); } catch (Exception $e) { - echo "err\n"; + echo "An error occurred: " . $e->getMessage() . "\n"; } } - - } /** @@ -57,61 +79,72 @@ class DataConsistencyChecker * @param array $arguments The command line arguments. * @return void */ - public function runFromCommandLine($arguments) - { - $shortOptions = "hd:v:o:r:s:"; - $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; + + public function runFromCommandLine(array $arguments): void + { + $shortOptions = "hd:v:o:r:s:"; + $longOptions = ["help", "directory:", "version:", "v", "output:", "o", "remove:", "r", "source:", "s"]; + + $options = getopt($shortOptions, $longOptions); + + if (count($options) === 0 || isset($options['h']) || isset($options['help'])) { + $this->displayHelpMessage(); + exit; + } + + $directory = $options['directory'] ?? $options['d'] ?? null; + $schemaVersion = $options['version'] ?? $options['v'] ?? null; + $source = $options['source'] ?? $options['s'] ?? null; + $remove = $options['remove'] ?? $options['r'] ?? null; + $structured_directory = $options['output'] ?? $options['o'] ?? null; + + if (!file_exists($structured_directory)) { + if ($structured_directory === null) { + echo "Output directory is required. Please specify the --output option.\n"; + exit; + } + mkdir($structured_directory, 0777, true); + } + + $this->structured_directory = $structured_directory; + + if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { + echo "Missing Attachment directory or schema version.\n"; + exit; + } + + if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { + echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; + exit; + } - $options = getopt($shortOptions, $longOptions); - - if (count($options) == 0 || isset($options['h']) || isset($options['help'])) { - $this->displayHelpMessage(); + if ($structured_directory === null) { + echo "Output directory is required. Please specify the --output option.\n"; exit; - } - - $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); - $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); - - $source = isset($options['source']) ? $options['source'] : (isset($options['s']) ? $options['s'] : null); - $remove = isset($options['remove']) ? $options['remove'] : (isset($options['r']) ? $options['r'] : null); - - $structured_directory = isset($options['output']) ? $options['output'] : (isset($options['o']) ? $options['o'] : null); - if (!file_exists($structured_directory)) { - mkdir($structured_directory, 0777, true); - } - $this->structured_directory = $structured_directory; - - if (($directory === null || $schemaVersion === null) && $remove === null && $source === null) { - echo "Missing Attachment directory or schema version.\n"; - exit; - } - if ($schemaVersion && !in_array($schemaVersion, [1, 2])) { - echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; - exit; - } - if ($this->structured_directory == null) { - $this->structured_directory = './'; - } - - static::$schemaVersion = (int) $schemaVersion; - $this->directory = $directory; - $this->retrived_csv = './result_from_physical_files.csv'; - if ($remove && $source) { - $this->processAttachmentDeletionCSV($remove, $source); - } else { - $this->checkConsistency('attachment_file_info'); - if (is_dir($this->structured_directory)) { - $this->removeDirectory($this->structured_directory); - } - } - exit; - } + } + + static::$schemaVersion = (int) $schemaVersion; + $this->directory = $directory; + $this->retrived_csv = './result_from_physical_files.csv'; + + if ($remove && $source) { + $this->processAttachmentDeletionCSV($remove, $source); + } else { + $this->checkConsistency('attachment_file_info'); + if (is_dir($this->structured_directory)) { + $this->removeDirectory($this->structured_directory); + } + } + exit; + } + /** * Displays the help message with instructions on how to use the script. * * @return void */ + private function displayHelpMessage(): void { $helpMessage = <<init(); $dbEntries = $this->getDbEntries($tableName); @@ -182,7 +219,7 @@ class DataConsistencyChecker * @return array An array containing the file entries organized by dynamic values. */ - private function getFileEntries($directory): array + private function getFileEntries(string $directory): array { $files = glob($directory . '/*'); $entries = []; @@ -190,19 +227,20 @@ class DataConsistencyChecker foreach ($files as $file) { if (is_file($file)) { $fileName = basename($file); + if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } - $dashParts = explode("-", $fileName, 2); - $dotParts = explode(".", $fileName); + $dashParts = explode(self::DELIMITER, $fileName, 2); + $dotParts = explode('.', $fileName); if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; $fileParts = $dashParts; - $delimiter = '-'; + $delimiter = self::DELIMITER; } elseif (count($dotParts) >= 2) { $clientId = $dotParts[0]; - $id = implode(".", array_slice($dotParts, 1)); + $id = implode('.', array_slice($dotParts, 1)); $fileParts = $dotParts; $delimiter = '.'; } else { @@ -228,7 +266,6 @@ class DataConsistencyChecker foreach ($entries as $clientId => $clientEntries) { $this->createPhysicalFileCSV($clientId, $clientEntries); } - return $entries; } @@ -241,9 +278,9 @@ class DataConsistencyChecker * @return void */ - private function createPhysicalFileCSV($clientId, $entries) + private function createPhysicalFileCSV(string $clientId, array $entries): void { - $fileName = $this->structured_directory . "physical_" . $clientId . ".csv"; + $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); fputcsv($csvFile, ['id', 'size', 'creation_time']); foreach ($entries as $entry) { @@ -265,9 +302,9 @@ class DataConsistencyChecker * @return void */ - private function createDBFileCSV($clientId, $entries): void + private function createDBFileCSV(string $clientId, array $entries): void { - $fileName = $this->structured_directory . "cassandra_" . (string) $clientId . ".csv"; + $fileName = $this->structured_directory . 'cassandra_' . (string) $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; @@ -283,10 +320,8 @@ class DataConsistencyChecker $entry['client_id'], $entry['attachment_id'], ]; - fputcsv($csvFile, $rowData); } - fclose($csvFile); } @@ -297,15 +332,13 @@ class DataConsistencyChecker * @return array An array containing the retrieved entries. */ - - private function getDbEntries($tableName) + private function getDbEntries(string $tableName): array { - - if ($this->schema_version() === 1) { - $query = "SELECT client_id, id, size, filename, created_on FROM $tableName"; - } else { - $query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName"; - } + $query = sprintf( + 'SELECT client_id, id, size, filename, created_on%s FROM %s', + $this->schema_version() === 1 ? '' : ', bucket', + $tableName + ); $arguments = []; $result = $this->cassandra->execute( $query, @@ -317,7 +350,6 @@ class DataConsistencyChecker $entries = []; while ($result) { foreach ($result as $row) { - if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); @@ -336,7 +368,7 @@ class DataConsistencyChecker $entries[$dynamicValue][$row['filename']] = $entry; } else { $clientId = substr($row['id'], 0, 2); - $file = $row['client_id'] . '-' . $row['id']; + $file = $row['client_id'] . self::DELIMITER . $row['id']; $timestamp = (int) $row['created_on']; $date = date('Y-m-d H:i:s', $timestamp); $creationTime = str_replace('"', '', $date); @@ -357,7 +389,6 @@ class DataConsistencyChecker } $result = $result->nextPage(); } - // Merge all entries into a single array $allEntries = []; foreach ($entries as $clientEntries) { @@ -381,7 +412,7 @@ class DataConsistencyChecker * @return array|null An array containing mismatched entries, or null if the entries match. */ - private function compareFileEntries($id, $data, $cassandra_file_assoc) + private function compareFileEntries(string $id, array $data, array $cassandra_file_assoc) { if (!isset($cassandra_file_assoc[$id])) { return [ @@ -412,7 +443,7 @@ class DataConsistencyChecker * @return array An array containing the lines of the file. */ - private function getFileLines($file): array + private function getFileLines(string $file): array { $file_contents = file_get_contents($file); return explode("\n", $file_contents); @@ -425,7 +456,7 @@ class DataConsistencyChecker * @return array An array of filtered and mapped entries. */ - private function filterAndMapEntries($entries): array + private function filterAndMapEntries(array $entries): array { $filtered_entries = array_filter($entries); $mapped_entries = array_map(function ($entry) { @@ -440,18 +471,27 @@ class DataConsistencyChecker * @param string $dir The directory path containing the files to be processed. * @return void */ - private function process_files_in_directory($dir): void + + private function process_files_in_directory(string $dir): void { $files = glob($dir . '/*.csv'); $physical_files = array(); $cassandra_files = array(); foreach ($files as $file) { - $filename = basename($file, '.csv'); - $csv_type = substr($filename, 0, strpos($filename, '_')); - $file_num = substr($filename, strpos($filename, '_') + 1); - if ($csv_type == 'physical') { + $filename = basename($file, static::CSV_EXTENSION); + $csv_type = substr( + $filename, + 0, + strpos($filename, '_') + ); + $file_num = substr( + $filename, + strpos($filename, '_') + 1 + ); + + if ($csv_type === 'physical') { $physical_files[$file_num] = $file; - } elseif ($csv_type == 'cassandra') { + } elseif ($csv_type === 'cassandra') { $cassandra_files[$file_num] = $file; } } @@ -465,7 +505,7 @@ class DataConsistencyChecker $physical_entries = []; foreach ($physical_files as $file_num => $physical_file) { - $cassandra_file = $dir . '/cassandra_' . $file_num . '.csv'; + $cassandra_file = $dir . '/cassandra_' . $file_num . self::CSV_EXTENSION; if (file_exists($physical_file) && file_exists($cassandra_file)) { $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); @@ -476,9 +516,6 @@ class DataConsistencyChecker } else { if (!file_exists($physical_file)) { $missing_physical_files[] = $physical_file; - - echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; - } if (!file_exists($cassandra_file)) { $missing_cassandra_files[] = $cassandra_file; @@ -493,8 +530,8 @@ class DataConsistencyChecker if ($line === reset($physical_file_lines)) { continue; } - $values = explode(",", $line); - if (count($values) == 3) { + $values = explode(',', $line); + if (count($values) === self::NUM_FIELDS) { $values[2] = str_replace('"', '', $values[2]); $file_assoc[$values[0]] = [$values[1], $values[2]]; } @@ -513,7 +550,7 @@ class DataConsistencyChecker } foreach ($cassandra_files as $file_num => $cassandra_file) { - $physical_file = $dir . '/physical_' . $file_num . '.csv'; + $physical_file = $dir . '/physical_' . $file_num . self::CSV_EXTENSION; if (!file_exists($physical_file)) { $cassandra_file_lines = $this->getFileLines($cassandra_file); @@ -532,12 +569,14 @@ class DataConsistencyChecker $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); - $this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv'); - $this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html'); - $this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv'); - $this->generateHtmlReport($result_from_physical_files, 'physical.html'); + + $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); + $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); + $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); + $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); } + /** * Builds an associative array from file lines. * @@ -545,26 +584,23 @@ class DataConsistencyChecker * @return array An associative array representing the file association. */ - private function buildFileAssociation($file_lines): array + private function buildFileAssociation(array $file_lines): array { $file_assoc = []; - foreach ($file_lines as $line) { if ($line === reset($file_lines)) { continue; } - $values = explode(",", $line); - if (count($values) == 3) { - + $values = explode(',', $line); + if (count($values) === self::NUM_FIELDS) { $file_assoc[$values[0]] = [$values[1], $values[2]]; } - if (count($values) > 3) { + if (count($values) > 3) { $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[6]]; } } - return $file_assoc; } @@ -576,7 +612,7 @@ class DataConsistencyChecker * @return array An array containing missing entries or entries with mismatched data. */ - private function compare_csv_files($file1_path, $file2_path): array + private function compare_csv_files(string $file1_path, string $file2_path): array { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); @@ -597,8 +633,16 @@ class DataConsistencyChecker $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); - - $file1_assoc = array_reduce($file1_data, function ($result, $row) use ($id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1) { + $file1_assoc = array_reduce( + $file1_data, function ($result, $row) use ( + $id_index_1, + $size_index_1, + $time_index_1, + $filename_index_1, + $clientid_index_1, + $bucket_index_1, + $attachment_id_index_1 + ) { $result[$row[$id_index_1]] = [ 'id' => $row[$id_index_1], 'file1' => [ @@ -613,7 +657,13 @@ class DataConsistencyChecker ]; return $result; }, []); - $file2_assoc = array_reduce($file2_data, function ($result, $row) use ($id_index_2, $size_index_2, $time_index_2, $filename_index_2) { + $file2_assoc = array_reduce( + $file2_data, function ($result, $row) use ( + $id_index_2, + $size_index_2, + $time_index_2, + $filename_index_2 + ) { $result[$row[$id_index_2]] = [ 'id' => $row[$id_index_2], 'file2' => [ @@ -648,19 +698,28 @@ class DataConsistencyChecker return $missing_entries; } - - - /** * Generates a CSV report of inconsistent files. * * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ + private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void { $fp = fopen($name, 'w'); - fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket', 'Id']); + fputcsv($fp, [ + self::CSV_COLUMN_FILE_ATTACHMENT, + self::CSV_COLUMN_FILE_PATH, + self::CSV_COLUMN_FILE_NAME, + self::CSV_COLUMN_THUMB1, + self::CSV_COLUMN_THUMB2, + self::CSV_COLUMN_SIZE, + self::CSV_COLUMN_CREATION_TIME, + self::CSV_COLUMN_CLIENT_ID, + self::CSV_COLUMN_BUCKET, + self::CSV_COLUMN_ID + ]); foreach ($inconsistentFiles as $row) { $check_value = $row['id']; @@ -691,7 +750,6 @@ class DataConsistencyChecker $attachmentId, ]); } - fclose($fp); chmod($name, 0666); } @@ -703,11 +761,19 @@ class DataConsistencyChecker * @param string $name The name of the CSV report file to be generated. * @return void */ + private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void { - $fp = fopen($name, 'w'); - fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']); + fputcsv($fp, [ + self::CSV_COLUMN_FILE_ATTACHMENT, + 'Entry Path', + 'Entry Name', + self::CSV_COLUMN_THUMB1, + self::CSV_COLUMN_THUMB2, + self::CSV_COLUMN_SIZE, + self::CSV_COLUMN_CREATION_TIME + ]); foreach ($inconsistentFiles as $row) { $filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; @@ -742,169 +808,51 @@ class DataConsistencyChecker * @param array $inconsistentFiles An array of inconsistent files. * @param string $filename The filename to use for the report. */ - private function generateHtmlReport(array $inconsistentFiles, string $name): void - { - $file = fopen($name, 'w'); - if (!$file) { - throw new Exception('Failed to open the file for writing.'); - } - $html = ''; - - foreach ($inconsistentFiles as $row) { - - $filePath = $this->directory . '/' . $row['file1'][0]; - $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); - $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); - $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; - $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; - - // Write the properties to the HTML table - $html .= ''; - } - - $html .= '
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($row['file1'][0]) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . '
'; - - fwrite($file, $html); - fclose($file); - } - - /** - * Compares two CSV files and creates a new CSV file containing the matching entries. - * - * @param string $firstFile The path to the first CSV file. - * @param string $secondFile The path to the second CSV file. - * @param string $finalFile The path to the final CSV file to be created. - * @return void - */ - - function compareCSVFilesTransform($firstFile, $secondFile, $finalFile): void - { - // Read the first CSV file - $firstData = array_map('str_getcsv', file($firstFile)); - $firstHeaders = array_shift($firstData); - - // Read the second CSV file - $secondData = array_map('str_getcsv', file($secondFile)); - $secondHeaders = array_shift($secondData); - - // Find the indexes of the columns to compare in both files - $firstIdIndex = array_search('id', $firstHeaders); - $firstSizeIndex = array_search('size', $firstHeaders); - $firstCreationTimeIndex = array_search('creation_time', $firstHeaders); - - $secondIdIndex = array_search('Entry Path', $secondHeaders); - $secondSizeIndex = array_search('size', $secondHeaders); - $secondCreationTimeIndex = array_search('creation_time', $secondHeaders); - - // Get the entries from the first file - $firstEntries = []; - foreach ($firstData as $row) { - $firstEntries[$row[$firstIdIndex]] = [ - 'id' => $row[$firstIdIndex], - 'size' => $row[$firstSizeIndex], - 'creation_time' => $row[$firstCreationTimeIndex] - ]; - } - - // Create the final result CSV file - $finalCsvFile = fopen($finalFile, 'w'); - fputcsv($finalCsvFile, ['id', 'size', 'creation_time']); - - // Compare the entries from the second file with the entries in the first file - foreach ($secondData as $row) { - $secondId = $row[$secondIdIndex]; - $secondSize = $row[$secondSizeIndex]; - $secondCreationTime = $row[$secondCreationTimeIndex]; - - // Check if there is a matching entry in the first file - if ( - isset($firstEntries[$secondId]) && - $firstEntries[$secondId]['size'] === $secondSize && - $firstEntries[$secondId]['creation_time'] === $secondCreationTime - ) { - fputcsv($finalCsvFile, [ - $secondId, - $secondSize, - $secondCreationTime - ]); + private function generateHtmlReport(array $inconsistentFiles, string $name): void + { + $templateFile = 'report_template.html'; + $templateContent = file_get_contents($templateFile); + + if ($templateContent === false) { + throw new Exception('Failed to read the HTML template file.'); + } + + $tableRows = ''; + foreach ($inconsistentFiles as $row) { + $filename = $row['file1'][0]; + $filePath = $this->directory . '/' . $filename; + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); + $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); + $thumb1 = $row['file1'][0] . '-thumb1'; + $thumb2 = $row['file1'][0] . '-thumb2'; + + if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { + $filename = $row['file1'][3]; + $filePath = $this->directory . '/' . $filename; + $thumb1 = ''; + $thumb2 = ''; } - } - - fclose($finalCsvFile); - } - - /** - * Deletes physical files based on the entries listed in a CSV file. - * - * @param string $csvFile The path to the CSV file containing the list of files to delete. - * @return void - */ - - private function deletePhysicalFilesFromCsv(string $csvFile): void - { - $directory = $this->directory . '/'; - $entries = []; - if (($handle = fopen($csvFile, 'r')) !== false) { - // Collect the file paths to be deleted - while (($data = fgetcsv($handle)) !== false) { - // Skip the header row - if ($data[0] === 'File/Attachment') { - continue; - } - - $filePath = $directory . $data[1]; - $entries[] = $filePath; - } - fclose($handle); - } - - // Prompt the user for confirmation before deleting the files - echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; - echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; - echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; - $confirmation = trim(fgets(STDIN)); - - if (strtolower($confirmation) === 'yes') { - // Create a log file to record the deleted files - $logFile = 'deleted_files.log'; - $logHandle = fopen($logFile, 'a'); - - // Delete the physical files - foreach ($entries as $filePath) { - if (file_exists($filePath)) { - // unlink($filePath); - echo "File deleted: $filePath" . PHP_EOL; - - // Write the deleted file path to the log file - fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); - } else { - echo "File not found: $filePath" . PHP_EOL; - } - } - - fclose($logHandle); - - echo "Deletion completed. The list of deleted files is saved in '$logFile'." . PHP_EOL; - } else { - echo "Deletion of physical files aborted." . PHP_EOL; - } - } - + + if (is_string($row['id']) && strpos($row['id'], '.') !== false) { + $filename = $row['id']; + $filePath = $this->directory . '/' . $filename; + $thumb1 = ''; + $thumb2 = ''; + } + + $tableRows .= 'Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($filename) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . ''; + } + $html = str_replace('{{table_rows}}', $tableRows, $templateContent); + $file = fopen($name, 'w'); + + if (!$file) { + throw new Exception('Failed to open the file for writing.'); + } + + fwrite($file, $html); + fclose($file); + } /** * Returns cassandra schema version @@ -912,21 +860,22 @@ class DataConsistencyChecker * * @return int */ - - private function schema_version() + + private function schema_version(): int { return static::$schemaVersion; } + /** * Returns info(id,size) for attachment file * * @param int $clientId * @param string|null $bucketId * @param string $id - * - * @return object + * @return object|null */ - private function get_info(int $clientId, ?string $bucketId, string $id): ?object + + private function get_info(int $clientId, ?string $bucketId, string $id): object { $attachment = null; $args = [ @@ -965,6 +914,7 @@ class DataConsistencyChecker * @param string $id * @return string */ + private function _set_bucket(string $id): string { return substr($id, 0, static::$bucketMagic); @@ -978,9 +928,10 @@ class DataConsistencyChecker * * @return void */ + private function _update_attachment_stats(bool $add, int $size): void { - $op = $add ? '+' : '-'; + $op = $add ? '+' : self::DELIMITER; $query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?'); $this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]); $query = $this->cassandra->prepare( @@ -996,6 +947,7 @@ class DataConsistencyChecker * * @return object|null */ + private function _get_attachment_key(string $id): ?object { $result = null; @@ -1009,7 +961,6 @@ class DataConsistencyChecker if ($data && $data->valid()) { $result = (object) $data->current(); } - ; return $result; } @@ -1018,14 +969,14 @@ class DataConsistencyChecker * * @param string $attachmentId * - * @return array + * @return object|null */ + private function _get_attachment_by_id(string $attachmentId): ?object { $attachment = null; try { $properties = '*'; - echo "attid: " . $attachmentId . PHP_EOL; $key = $this->_get_attachment_key($attachmentId); if ($key) { if ($this->schema_version() === 1) { @@ -1050,8 +1001,6 @@ class DataConsistencyChecker } } catch (Cassandra\Exception\InvalidArgumentException $e) { } - //echo "ATTA: \n"; - //var_dump($attachment); return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; } @@ -1061,16 +1010,19 @@ class DataConsistencyChecker * * @param string $id */ + private function _delete_attachment_key(string $id): void { - $result = null; - $query = $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'); - $arguments = [ - 'client_id' => static::$clientId, - 'bucket' => $this->_set_bucket($id), - 'id' => $id, - ]; - $this->cassandra->executeAsync($query, ['arguments' => $arguments]); + $this->cassandra->executeAsync( + $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'), + [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'bucket' => $this->_set_bucket($id), + 'id' => $id + ] + ] + ); } /** @@ -1082,6 +1034,7 @@ class DataConsistencyChecker * * @return void */ + private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void { $queryArguments = [ @@ -1089,7 +1042,7 @@ class DataConsistencyChecker 'bucket' => $this->_set_bucket($data_id), 'id' => $data_id, ]; - $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : '-') . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); + $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : self::DELIMITER) . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); if (!empty($attachmentId)) { @@ -1115,6 +1068,7 @@ class DataConsistencyChecker * * @return bool */ + public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool { static::$clientId = $clientId; @@ -1135,9 +1089,6 @@ class DataConsistencyChecker } $query = $this->cassandra->prepare($q); $this->cassandra->execute($query, ['arguments' => $refData]); - - - echo "DEL REFS\n"; $refData['bucket'] = $this->_set_bucket($id); $query = $this->cassandra->prepare( 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' @@ -1150,7 +1101,6 @@ class DataConsistencyChecker ); $result = $this->cassandra->execute($query, ['arguments' => $refData]); - echo "DEL ATTACHMENTS\n"; if ($this->schema_version() === 1) { $delQuery = $this->cassandra->prepare( 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' @@ -1258,6 +1208,7 @@ class DataConsistencyChecker * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def */ + public function processAttachmentDeletionCSV(string $file, string $src): void { @@ -1278,7 +1229,7 @@ class DataConsistencyChecker if ($line === reset($file_lines)) { continue; } - $values = explode(",", $line); + $values = explode(',', $line); if ($values[0] === 'File/Attachment') { continue; @@ -1293,11 +1244,13 @@ class DataConsistencyChecker $data->size = $values[5]; $data->created = $values[6]; $path = dirname($data->path); + if (count($values) >= 10) { $data->clientId = (int) $values[7]; $data->bucket = $values[8]; $data->id = $values[9]; } + if ($data->source === 'Attachment' && $src === 'cassandra') { echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; $this->deleteAttachment($data->clientId, $data->bucket, $data->id); @@ -1306,12 +1259,14 @@ class DataConsistencyChecker $filePath = $values[1]; $thumb1Path = $path . '/' . $values[3]; $thumb2Path = $path . '/' . $values[4]; + if (file_exists($thumb1Path) && file_exists($thumb2Path)) { files::delete($thumb1Path); files::delete($thumb2Path); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); } + if (file_exists($filePath)) { unlink($filePath); unlink($thumb1Path); @@ -1325,9 +1280,7 @@ class DataConsistencyChecker } } } - fclose($logHandle); - echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL; } } diff --git a/report_template.html b/report_template.html new file mode 100644 index 0000000..8e3d699 --- /dev/null +++ b/report_template.html @@ -0,0 +1,37 @@ + + + + + + + + + + + + + + + + + + + {{table_rows}} + +
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
+ + From b0acef47a1e8428107f73bbe67857abced05daa5 Mon Sep 17 00:00:00 2001 From: Spase Date: Tue, 30 May 2023 18:39:44 +0200 Subject: [PATCH 4/4] Fixed a few bug reported from testing --- cassandra_consistency_script.php | 108 ++++++++++++++++++------------- 1 file changed, 64 insertions(+), 44 deletions(-) diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 24fa4e0..51d7e82 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -58,7 +58,7 @@ class DataConsistencyChecker $cassandraConfig = $config['CASSANDRA']; $this->_cluster = Cassandra::cluster() ->withContactPoints($cassandraConfig['host']) - ->withPort($cassandraConfig['port']) + ->withPort((int)$cassandraConfig['port']) ->withCredentials( $cassandraConfig['user'], $cassandraConfig['password'] @@ -99,11 +99,15 @@ class DataConsistencyChecker $structured_directory = $options['output'] ?? $options['o'] ?? null; if (!file_exists($structured_directory)) { - if ($structured_directory === null) { + + if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } - mkdir($structured_directory, 0777, true); + + if ($structured_directory !== null) { + mkdir($structured_directory, 0777, true); + } } $this->structured_directory = $structured_directory; @@ -118,7 +122,7 @@ class DataConsistencyChecker exit; } - if ($structured_directory === null) { + if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } @@ -131,6 +135,7 @@ class DataConsistencyChecker $this->processAttachmentDeletionCSV($remove, $source); } else { $this->checkConsistency('attachment_file_info'); + if (is_dir($this->structured_directory)) { $this->removeDirectory($this->structured_directory); } @@ -153,16 +158,16 @@ class DataConsistencyChecker Options: -h, --help Display this help screen. - --version Set the schema version. + --version Set the schema version (default: v1). --directory Set the directory path for attachments. - --output Set the folder for temp files + --output Set the folder for temp files - note that this folder will be deleted after script completes --remove We need to choose between file and cassandra what we want to remove Example: php script_name --version schema_version --directory=/path/to/directory --output ./out/ For Delete: php script_name --remove result_from_cassandra_entries.csv --source file - to remove missing physical files - php script_name --remove result_from_physical_files.csv --source cassandra - to remove missing cassandra entries + php script_name --remove result_from_physical_files.csv --source cassandra --version 2 - to remove missing cassandra entries EOT; echo $helpMessage; } @@ -186,7 +191,7 @@ class DataConsistencyChecker ); foreach ($files as $file) { - $path = $directory . '/' . $file; + $path = $directory . DIRECTORY_SEPARATOR . $file; if (is_dir($path)) { $this->removeDirectory($path); @@ -231,8 +236,10 @@ class DataConsistencyChecker if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } + $dashParts = explode(self::DELIMITER, $fileName, 2); $dotParts = explode('.', $fileName); + if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; @@ -278,21 +285,24 @@ class DataConsistencyChecker * @return void */ - private function createPhysicalFileCSV(string $clientId, array $entries): void - { + private function createPhysicalFileCSV(string $clientId, array $entries): void + { $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); - fputcsv($csvFile, ['id', 'size', 'creation_time']); + fputcsv($csvFile, ['id', 'size', 'creation_time', 'filename']); + foreach ($entries as $entry) { - fputcsv($csvFile, [ - $entry['file_name'], - $entry['size'], - $entry['creation_time'] - ]); + $idx = strpos($entry['file_name'],'.'); + $id = $idx === false ? $entry['file_name'] : substr($entry['file_name'],0,$idx); + fputcsv($csvFile, [ + $id, + $entry['size'], + $entry['creation_time'], + $entry['file_name'] + ]); } fclose($csvFile); - } - + } /** * Creates a CSV file containing the entries for a specific client ID. @@ -350,6 +360,11 @@ class DataConsistencyChecker $entries = []; while ($result) { foreach ($result as $row) { + + if ($row['bucket'] === 'x') { + continue; // Skip the current iteration if bucket is 'x' + } + if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); @@ -365,7 +380,11 @@ class DataConsistencyChecker 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; - $entries[$dynamicValue][$row['filename']] = $entry; + + if ($row['bucket'] !== 'x') { + $entries[$dynamicValue][$row['filename']] = $entry; + } + } else { $clientId = substr($row['id'], 0, 2); $file = $row['client_id'] . self::DELIMITER . $row['id']; @@ -384,7 +403,9 @@ class DataConsistencyChecker if (!isset($entries[$clientId])) { $entries[$clientId] = []; } - $entries[$clientId][$file] = $entry; + if ($row['bucket'] !== 'x') { + $entries[$clientId][$file] = $entry; + } } } $result = $result->nextPage(); @@ -569,12 +590,10 @@ class DataConsistencyChecker $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); - $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); - } /** @@ -616,10 +635,8 @@ class DataConsistencyChecker { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); - $file1_headers = array_shift($file1_data); $file2_headers = array_shift($file2_data); - // find indexes of columns in each file $id_index_1 = array_search('id', $file1_headers); $id_index_2 = array_search('id', $file2_headers); @@ -629,7 +646,6 @@ class DataConsistencyChecker $time_index_2 = array_search('creation_time', $file2_headers); $filename_index_1 = array_search('filename', $file1_headers); $filename_index_2 = array_search('filename', $file2_headers); - $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); @@ -675,7 +691,6 @@ class DataConsistencyChecker ]; return $result; }, []); - $missing_entries = []; foreach ($file1_assoc as $id => $data) { if (!isset($file2_assoc[$id])) { @@ -694,7 +709,6 @@ class DataConsistencyChecker } } } - return $missing_entries; } @@ -733,9 +747,9 @@ class DataConsistencyChecker $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; - $clientId = isset($row['file1'][4]) ? $row['file1'][4] : ''; - $bucket = isset($row['file1'][5]) ? $row['file1'][5] : ''; - $attachmentId = isset($row['file1'][5]) ? $row['file1'][6] : ''; + $bucket = isset($row['file1'][4]) ? $row['file1'][4] : ''; + $clientId = isset($row['file1'][5]) ? $row['file1'][5] : ''; + $attachmentId = isset($row['file1'][6]) ? $row['file1'][6] : ''; fputcsv($fp, [ 'Attachment', @@ -776,11 +790,11 @@ class DataConsistencyChecker ]); foreach ($inconsistentFiles as $row) { - $filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] ? $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] : $this->directory . DIRECTORY_SEPARATOR . $row['file2'][3]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); - $thumb1 = $row['file1'][0] . '-thumb1'; - $thumb2 = $row['file1'][0] . '-thumb2'; + $thumb1 = $row['file1'][3] . '-thumb1'; + $thumb2 = $row['file1'][3] . '-thumb2'; if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $old_attachment = explode('.', $row['id'])[0]; $thumb1 = ''; @@ -796,7 +810,6 @@ class DataConsistencyChecker $creationTime ]); } - fclose($fp); chmod($name, 0666); } @@ -821,7 +834,7 @@ class DataConsistencyChecker $tableRows = ''; foreach ($inconsistentFiles as $row) { $filename = $row['file1'][0]; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = $row['file1'][0] . '-thumb1'; @@ -829,14 +842,14 @@ class DataConsistencyChecker if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { $filename = $row['file1'][3]; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $filename = $row['id']; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } @@ -875,7 +888,7 @@ class DataConsistencyChecker * @return object|null */ - private function get_info(int $clientId, ?string $bucketId, string $id): object + private function get_info(int $clientId, ?string $bucketId, string $id): ?object { $attachment = null; $args = [ @@ -887,7 +900,6 @@ class DataConsistencyChecker $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); } else { $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; - echo "Q: [" . $q . "]\n"; $query = $this->cassandra->prepare($q); } $res = $this->cassandra->execute( @@ -958,6 +970,7 @@ class DataConsistencyChecker 'id' => $id, ]; $data = $this->cassandra->execute($query, ['arguments' => $arguments]); + if ($data && $data->valid()) { $result = (object) $data->current(); } @@ -1220,12 +1233,15 @@ class DataConsistencyChecker if (strtolower($confirmation) !== 'yes') { return; } + + $this->init(); $file_contents = file_get_contents($file); $file_lines = explode("\n", $file_contents); $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); foreach ($file_lines as $line) { + if ($line === reset($file_lines)) { continue; } @@ -1234,6 +1250,7 @@ class DataConsistencyChecker if ($values[0] === 'File/Attachment') { continue; } + if (count($values) >= 7) { $data = (object) array(); $data->source = $values[0]; @@ -1257,12 +1274,15 @@ class DataConsistencyChecker fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); } else if ($data->source === 'File' && $src === 'file') { $filePath = $values[1]; - $thumb1Path = $path . '/' . $values[3]; - $thumb2Path = $path . '/' . $values[4]; + $thumb1Path = $path . DIRECTORY_SEPARATOR . $values[3]; + $thumb2Path = $path . DIRECTORY_SEPARATOR . $values[4]; + $filePath = str_replace('"', '', $filePath); + $thumb1Path = str_replace('"', '', $thumb1Path); + $thumb2Path = str_replace('"', '', $thumb2Path); if (file_exists($thumb1Path) && file_exists($thumb2Path)) { - files::delete($thumb1Path); - files::delete($thumb2Path); + unlink($thumb1Path); + unlink($thumb2Path); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); } @@ -1286,4 +1306,4 @@ class DataConsistencyChecker } $checker = new DataConsistencyChecker(); -$checker->checkConsistency("attachment_file_info", true); \ No newline at end of file +$checker->checkConsistency("attachment_file_info", true);