diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php index 24fa4e0..51d7e82 100644 --- a/cassandra_consistency_script.php +++ b/cassandra_consistency_script.php @@ -58,7 +58,7 @@ class DataConsistencyChecker $cassandraConfig = $config['CASSANDRA']; $this->_cluster = Cassandra::cluster() ->withContactPoints($cassandraConfig['host']) - ->withPort($cassandraConfig['port']) + ->withPort((int)$cassandraConfig['port']) ->withCredentials( $cassandraConfig['user'], $cassandraConfig['password'] @@ -99,11 +99,15 @@ class DataConsistencyChecker $structured_directory = $options['output'] ?? $options['o'] ?? null; if (!file_exists($structured_directory)) { - if ($structured_directory === null) { + + if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } - mkdir($structured_directory, 0777, true); + + if ($structured_directory !== null) { + mkdir($structured_directory, 0777, true); + } } $this->structured_directory = $structured_directory; @@ -118,7 +122,7 @@ class DataConsistencyChecker exit; } - if ($structured_directory === null) { + if ($structured_directory === null && $source === null) { echo "Output directory is required. Please specify the --output option.\n"; exit; } @@ -131,6 +135,7 @@ class DataConsistencyChecker $this->processAttachmentDeletionCSV($remove, $source); } else { $this->checkConsistency('attachment_file_info'); + if (is_dir($this->structured_directory)) { $this->removeDirectory($this->structured_directory); } @@ -153,16 +158,16 @@ class DataConsistencyChecker Options: -h, --help Display this help screen. - --version Set the schema version. + --version Set the schema version (default: v1). --directory Set the directory path for attachments. - --output Set the folder for temp files + --output Set the folder for temp files - note that this folder will be deleted after script completes --remove We need to choose between file and cassandra what we want to remove Example: php script_name --version schema_version --directory=/path/to/directory --output ./out/ For Delete: php script_name --remove result_from_cassandra_entries.csv --source file - to remove missing physical files - php script_name --remove result_from_physical_files.csv --source cassandra - to remove missing cassandra entries + php script_name --remove result_from_physical_files.csv --source cassandra --version 2 - to remove missing cassandra entries EOT; echo $helpMessage; } @@ -186,7 +191,7 @@ class DataConsistencyChecker ); foreach ($files as $file) { - $path = $directory . '/' . $file; + $path = $directory . DIRECTORY_SEPARATOR . $file; if (is_dir($path)) { $this->removeDirectory($path); @@ -231,8 +236,10 @@ class DataConsistencyChecker if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { continue; } + $dashParts = explode(self::DELIMITER, $fileName, 2); $dotParts = explode('.', $fileName); + if (count($dashParts) === 2) { $clientId = $dashParts[0]; $id = $dashParts[1]; @@ -278,21 +285,24 @@ class DataConsistencyChecker * @return void */ - private function createPhysicalFileCSV(string $clientId, array $entries): void - { + private function createPhysicalFileCSV(string $clientId, array $entries): void + { $fileName = $this->structured_directory . 'physical_' . $clientId . self::CSV_EXTENSION; $csvFile = fopen($fileName, 'w'); - fputcsv($csvFile, ['id', 'size', 'creation_time']); + fputcsv($csvFile, ['id', 'size', 'creation_time', 'filename']); + foreach ($entries as $entry) { - fputcsv($csvFile, [ - $entry['file_name'], - $entry['size'], - $entry['creation_time'] - ]); + $idx = strpos($entry['file_name'],'.'); + $id = $idx === false ? $entry['file_name'] : substr($entry['file_name'],0,$idx); + fputcsv($csvFile, [ + $id, + $entry['size'], + $entry['creation_time'], + $entry['file_name'] + ]); } fclose($csvFile); - } - + } /** * Creates a CSV file containing the entries for a specific client ID. @@ -350,6 +360,11 @@ class DataConsistencyChecker $entries = []; while ($result) { foreach ($result as $row) { + + if ($row['bucket'] === 'x') { + continue; // Skip the current iteration if bucket is 'x' + } + if (preg_match('/^[0-9]+$/', $row['id'])) { $dotParts = explode(".", $row['filename'], 2); $dynamicValue = substr($dotParts[1], 0, 2); @@ -365,7 +380,11 @@ class DataConsistencyChecker 'client_id' => (string) $row['client_id'], 'attachment_id' => (string) $row['id'], ]; - $entries[$dynamicValue][$row['filename']] = $entry; + + if ($row['bucket'] !== 'x') { + $entries[$dynamicValue][$row['filename']] = $entry; + } + } else { $clientId = substr($row['id'], 0, 2); $file = $row['client_id'] . self::DELIMITER . $row['id']; @@ -384,7 +403,9 @@ class DataConsistencyChecker if (!isset($entries[$clientId])) { $entries[$clientId] = []; } - $entries[$clientId][$file] = $entry; + if ($row['bucket'] !== 'x') { + $entries[$clientId][$file] = $entry; + } } } $result = $result->nextPage(); @@ -569,12 +590,10 @@ class DataConsistencyChecker $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); - $this->generateCsvReportForDbEntries($result_from_cassandra_entries, self::CASSANDRA_RESULT_CSV); $this->generateHtmlReport($result_from_cassandra_entries, self::CASSANDRA_HTML_REPORT); $this->generateCsvReportForPhysicalFiles($result_from_physical_files, self::PHYSICAL_RESULT_CSV); $this->generateHtmlReport($result_from_physical_files, self::PHYSICAL_HTML_REPORT); - } /** @@ -616,10 +635,8 @@ class DataConsistencyChecker { $file1_data = array_map('str_getcsv', file($file1_path)); $file2_data = array_map('str_getcsv', file($file2_path)); - $file1_headers = array_shift($file1_data); $file2_headers = array_shift($file2_data); - // find indexes of columns in each file $id_index_1 = array_search('id', $file1_headers); $id_index_2 = array_search('id', $file2_headers); @@ -629,7 +646,6 @@ class DataConsistencyChecker $time_index_2 = array_search('creation_time', $file2_headers); $filename_index_1 = array_search('filename', $file1_headers); $filename_index_2 = array_search('filename', $file2_headers); - $clientid_index_1 = array_search('client_id', $file1_headers); $bucket_index_1 = array_search('bucket', $file1_headers); $attachment_id_index_1 = array_search('attachment_id', $file1_headers); @@ -675,7 +691,6 @@ class DataConsistencyChecker ]; return $result; }, []); - $missing_entries = []; foreach ($file1_assoc as $id => $data) { if (!isset($file2_assoc[$id])) { @@ -694,7 +709,6 @@ class DataConsistencyChecker } } } - return $missing_entries; } @@ -733,9 +747,9 @@ class DataConsistencyChecker $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; - $clientId = isset($row['file1'][4]) ? $row['file1'][4] : ''; - $bucket = isset($row['file1'][5]) ? $row['file1'][5] : ''; - $attachmentId = isset($row['file1'][5]) ? $row['file1'][6] : ''; + $bucket = isset($row['file1'][4]) ? $row['file1'][4] : ''; + $clientId = isset($row['file1'][5]) ? $row['file1'][5] : ''; + $attachmentId = isset($row['file1'][6]) ? $row['file1'][6] : ''; fputcsv($fp, [ 'Attachment', @@ -776,11 +790,11 @@ class DataConsistencyChecker ]); foreach ($inconsistentFiles as $row) { - $filePath = $this->directory . '/' . $row['file1'][0] ? $this->directory . '/' . $row['file1'][0] : $this->directory . '/' . $row['file2'][0]; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] ? $this->directory . DIRECTORY_SEPARATOR . $row['file1'][3] : $this->directory . DIRECTORY_SEPARATOR . $row['file2'][3]; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); - $thumb1 = $row['file1'][0] . '-thumb1'; - $thumb2 = $row['file1'][0] . '-thumb2'; + $thumb1 = $row['file1'][3] . '-thumb1'; + $thumb2 = $row['file1'][3] . '-thumb2'; if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $old_attachment = explode('.', $row['id'])[0]; $thumb1 = ''; @@ -796,7 +810,6 @@ class DataConsistencyChecker $creationTime ]); } - fclose($fp); chmod($name, 0666); } @@ -821,7 +834,7 @@ class DataConsistencyChecker $tableRows = ''; foreach ($inconsistentFiles as $row) { $filename = $row['file1'][0]; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); $creationTime = isset($row['file1'][2]) ? str_replace('"', '', $row['file1'][2]) : date('Y-m-d H:i:s', filectime($filePath)); $thumb1 = $row['file1'][0] . '-thumb1'; @@ -829,14 +842,14 @@ class DataConsistencyChecker if (isset($row['file1'][3]) && preg_match('/^[0-9]+\./', $row['file1'][3])) { $filename = $row['file1'][3]; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } if (is_string($row['id']) && strpos($row['id'], '.') !== false) { $filename = $row['id']; - $filePath = $this->directory . '/' . $filename; + $filePath = $this->directory . DIRECTORY_SEPARATOR . $filename; $thumb1 = ''; $thumb2 = ''; } @@ -875,7 +888,7 @@ class DataConsistencyChecker * @return object|null */ - private function get_info(int $clientId, ?string $bucketId, string $id): object + private function get_info(int $clientId, ?string $bucketId, string $id): ?object { $attachment = null; $args = [ @@ -887,7 +900,6 @@ class DataConsistencyChecker $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ?'); } else { $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; - echo "Q: [" . $q . "]\n"; $query = $this->cassandra->prepare($q); } $res = $this->cassandra->execute( @@ -958,6 +970,7 @@ class DataConsistencyChecker 'id' => $id, ]; $data = $this->cassandra->execute($query, ['arguments' => $arguments]); + if ($data && $data->valid()) { $result = (object) $data->current(); } @@ -1220,12 +1233,15 @@ class DataConsistencyChecker if (strtolower($confirmation) !== 'yes') { return; } + + $this->init(); $file_contents = file_get_contents($file); $file_lines = explode("\n", $file_contents); $logFile = 'deleted_files.log'; $logHandle = fopen($logFile, 'a'); foreach ($file_lines as $line) { + if ($line === reset($file_lines)) { continue; } @@ -1234,6 +1250,7 @@ class DataConsistencyChecker if ($values[0] === 'File/Attachment') { continue; } + if (count($values) >= 7) { $data = (object) array(); $data->source = $values[0]; @@ -1257,12 +1274,15 @@ class DataConsistencyChecker fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); } else if ($data->source === 'File' && $src === 'file') { $filePath = $values[1]; - $thumb1Path = $path . '/' . $values[3]; - $thumb2Path = $path . '/' . $values[4]; + $thumb1Path = $path . DIRECTORY_SEPARATOR . $values[3]; + $thumb2Path = $path . DIRECTORY_SEPARATOR . $values[4]; + $filePath = str_replace('"', '', $filePath); + $thumb1Path = str_replace('"', '', $thumb1Path); + $thumb2Path = str_replace('"', '', $thumb2Path); if (file_exists($thumb1Path) && file_exists($thumb2Path)) { - files::delete($thumb1Path); - files::delete($thumb2Path); + unlink($thumb1Path); + unlink($thumb2Path); fwrite($logHandle, "Deleted thumbnail: " . $thumb1Path . PHP_EOL); fwrite($logHandle, "Deleted thumbnail: " . $thumb2Path . PHP_EOL); } @@ -1286,4 +1306,4 @@ class DataConsistencyChecker } $checker = new DataConsistencyChecker(); -$checker->checkConsistency("attachment_file_info", true); \ No newline at end of file +$checker->checkConsistency("attachment_file_info", true);