From 268e8c2a4f0a3baa9ad4477eb3589eed36bc59b3 Mon Sep 17 00:00:00 2001 From: Spase Date: Fri, 19 May 2023 11:58:21 +0200 Subject: [PATCH] Initial commit --- cassandra_consistency_script.php | 1299 ++++++++++++++++++++++++++++++ 1 file changed, 1299 insertions(+) create mode 100644 cassandra_consistency_script.php diff --git a/cassandra_consistency_script.php b/cassandra_consistency_script.php new file mode 100644 index 0000000..68e0ef9 --- /dev/null +++ b/cassandra_consistency_script.php @@ -0,0 +1,1299 @@ +directory = $directory; + } + + $this->runFromCommandLine($_SERVER['argv']); + } + + + public function init(): void + { + $this->_cluster = Cassandra::cluster() + ->withContactPoints('cassandra') + ->withPort(9042) + ->build(); + if ($this->_cluster) { + try { + $this->cassandra = $this->_cluster->connect("tr_key"); + } catch (Exception $e) { + echo "err\n"; + } + } + // static::$bucketMagic = defined('CASSANDRA_BUCKET_MAGIC') ? (int) CASSANDRA_BUCKET_MAGIC : 4; + + } + + public function runFromCommandLine($arguments) + { + $shortOptions = "hd:v"; + $longOptions = ["help", "directory:", "version:", "v"]; + + $options = getopt($shortOptions, $longOptions); + + if (count($options) == 0 || isset($options['h']) || isset($options['help'])) { + $this->displayHelpMessage(); + exit; + } + + $directory = isset($options['directory']) ? $options['directory'] : (isset($options['d']) ? $options['d'] : null); + $schemaVersion = isset($options['version']) ? $options['version'] : (isset($options['v']) ? $options['v'] : null); + + if ($directory === null || $schemaVersion === null) { + echo "Missing Attachment directory or schema version.\n"; + exit; + } + if (!in_array($schemaVersion, [1, 2])) { + echo "Invalid schema version. Only versions 1 and 2 are supported.\n"; + exit; + } + + static::$schemaVersion = (int)$schemaVersion; + + $this->checkConsistency('attachment_file_info'); + exit; + } + + private function displayHelpMessage() + { + $helpMessage = <<init(); + $dbEntries = $this->getDbEntries($tableName); + $fileEntries = $this->getFileEntries($this->directory); + $this->process_files_in_directory($this->structured_directory); + $this->createCSVById($this->retrived_csv); + } + + private function getFileEntries($directory) + { + $files = glob($directory . '/*'); + $entries = []; + + foreach ($files as $file) { + if (is_file($file)) { + $fileName = basename($file); + + // Skip files with specific suffixes + if (strpos($fileName, '-thumb1') !== false || strpos($fileName, '-thumb2') !== false) { + continue; + } + + $dashParts = explode("-", $fileName, 2); + $dotParts = explode(".", $fileName); + + if (count($dashParts) === 2) { + $clientId = $dashParts[0]; + $id = $dashParts[1]; + $fileParts = $dashParts; + $delimiter = '-'; + } elseif (count($dotParts) >= 2) { + $clientId = $dotParts[0]; + $id = implode(".", array_slice($dotParts, 1)); + $fileParts = $dotParts; + $delimiter = '.'; + } else { + // Handle cases where the file name does not contain either a dash or a dot + continue; + } + + $filePath = $file; + $size = filesize($filePath); + + // Use file modification time (filemtime()) instead of file creation time (filectime()) + $creationTime = date('Y-m-d H:i:s', filemtime($filePath)); + + $dynamicValue = substr($id, 0, 2); + $creationTime = str_replace('"', '', $creationTime); + + $entries[$dynamicValue][] = [ + 'client_id' => $clientId, + 'file_path' => $filePath, + 'file_name' => implode($delimiter, $fileParts), + 'size' => $size, + 'creation_time' => $creationTime, + ]; + } + } + + // create CSV file for each client ID's physical file entries + foreach ($entries as $clientId => $clientEntries) { + $this->createPhysicalFileCSV($clientId, $clientEntries); + } + + return $entries; + } + + + + + + private function createPhysicalFileCSV($clientId, $entries) + { + $fileName = "physical_" . $clientId . ".csv"; + $csvFile = fopen($fileName, 'w'); + fputcsv($csvFile, ['id', 'size', 'creation_time']); + foreach ($entries as $entry) { + fputcsv($csvFile, [ + $entry['file_name'], + $entry['size'], + $entry['creation_time'] + ]); + } + fclose($csvFile); + } + + + + private function createDBFileCSV($clientId, $entries) + { + $fileName = "cassandra_" . (string) $clientId . ".csv"; + $csvFile = fopen($fileName, 'w'); + + $headers = ['id', 'size', 'creation_time', 'filename', 'bucket', 'client_id', 'attachment_id']; + + fputcsv($csvFile, $headers); + foreach ($entries as $key => $entry) { + $rowData = [ + $entry['id'], + $entry['size'], + $entry['creation_time'], + $entry['filename'], + $entry['bucket'], + $entry['client_id'], + $entry['attachment_id'], + ]; + + fputcsv($csvFile, $rowData); + } + + fclose($csvFile); + } + + + + private function getDbEntries($tableName) + { + + if($this->schema_version() === 1) { + $query = "SELECT client_id, id, size, filename, created_on FROM $tableName"; + } else { + $query = "SELECT client_id, id, size, filename, created_on, bucket FROM $tableName"; + } + $arguments = []; + $result = $this->cassandra->execute( + $query, + [ + 'arguments' => $arguments, + 'page_size' => static::DEFAULT_PAGE_SIZE + ] + ); + $entries = []; + while ($result) { + foreach ($result as $row) { + if (preg_match('/^[0-9]+$/', $row['id'])) { + $dotParts = explode(".", $row['filename'], 2); + $dynamicValue = substr($dotParts[1], 0, 2); + $timestamp = (int) $row['created_on']; + $date = date('Y-m-d H:i:s', $timestamp); + $creationTime = str_replace('"', '', $date); + $entry = [ + + "id" => $row['id'], + "size" => (string) $row['size'], + "creation_time" => $creationTime, + 'filename' => $row['filename'], + 'bucket' => $row['bucket'] ? $row['bucket'] : '', + 'client_id' => (string) $row['client_id'], + 'attachment_id' => (string) $row['id'], + ]; + $entries[$dynamicValue][$row['filename']] = $entry; + } else { + $clientId = substr($row['id'], 0, 2); + $file = $row['client_id'] . '-' . $row['id']; + $timestamp = (int) $row['created_on']; + $date = date('Y-m-d H:i:s', $timestamp); + $creationTime = str_replace('"', '', $date); + $entry = [ + "id" => $file, + "size" => (string) $row['size'], + "creation_time" => $creationTime, + 'filename' => $row['filename'], + 'bucket' => $row['bucket'] ? $row['bucket'] : '', + 'client_id' => (string) $row['client_id'], + 'attachment_id' => (string) $row['id'], + ]; + if (!isset($entries[$clientId])) { + $entries[$clientId] = []; + } + $entries[$clientId][$file] = $entry; + } + } + $result = $result->nextPage(); + } + + // Merge all entries into a single array + $allEntries = []; + foreach ($entries as $clientEntries) { + $allEntries = array_merge($allEntries, $clientEntries); + } + // Create CSV file for each client ID's physical file entries + foreach ($entries as $clientId => $clientEntries) { + $this->createDBFileCSV($clientId, $clientEntries); + } + + return $entries; + } + + private function createCSVById($csvFile) + { + $data = []; + + + $fileData = array_map('str_getcsv', file($csvFile)); + $headers = array_shift($fileData); + + + $idIndex = array_search('File path', $headers); + $old_migrated_data = array_search('migrated_files_id', $headers); + + + $query = "SELECT * FROM attachment_file_info WHERE id = ? ALLOW FILTERING"; + $statement = $this->cassandra->prepare($query); + + foreach ($fileData as $row) { + if ($old_migrated_data !== false && isset($row[$old_migrated_data])) { + $id = $row[$old_migrated_data]; + } else { + $id = $row[2]; + } + + $options = ['arguments' => [$id]]; + + + $result = $this->cassandra->execute($statement, $options); + + + foreach ($result as $row) { + $data[] = $row; + } + } + + + $fileName = "data_by_id.csv"; + $csvFile = fopen($fileName, 'w'); + + fputcsv($csvFile, $headers); + + foreach ($data as $entry) { + $rowData = []; + + foreach ($headers as $field) { + $value = isset($entry[$field]) ? $entry[$field] : ''; + $rowData[] = $value; + } + + fputcsv($csvFile, $rowData); + } + + fclose($csvFile); + } + + private function parseCSVFile($file) + { + $file_contents = file_get_contents($file); + $file_lines = explode("\n", $file_contents); + $file_assoc = array(); + + foreach ($file_lines as $line) { + if ($line === reset($file_lines)) { + continue; + } + $values = explode(",", $line); + if (count($values) == 3) { + $values[2] = str_replace('"', '', $values[2]); + $file_assoc[$values[0]] = [$values[1], $values[2]]; + } + } + + return $file_assoc; + } + + // Helper function to read file contents and parse CSV lines + private function compareFileEntries($id, $data, $cassandra_file_assoc) + { + if (!isset($cassandra_file_assoc[$id])) { + return [ + 'id' => $id, + 'file1' => [$id, trim($data[0], '"'), trim($data[1], '"')], + ]; + } else { + $physical_value1 = trim($data[0], '"'); + $physical_value2 = trim($data[1], '"'); + $cassandra_value1 = trim($cassandra_file_assoc[$id][0], '"'); + $cassandra_value2 = trim($cassandra_file_assoc[$id][1], '"'); + + if ($physical_value1 !== $cassandra_value1 || $physical_value2 !== $cassandra_value2) { + return [ + 'id' => $id, + 'file1' => [$id, $physical_value1, $physical_value2], + 'file2' => [$id, $cassandra_value1, $cassandra_value2], + ]; + } + } + return null; + } + + private function getFileLines($file) + { + $file_contents = file_get_contents($file); + return explode("\n", $file_contents); + } + + private function filterAndMapEntries($entries) + { + $filtered_entries = array_filter($entries); + $mapped_entries = array_map(function ($entry) { + return $entry[0]; + }, $filtered_entries); + return array_values($mapped_entries); + } + private function process_files_in_directory($dir) + { + $files = glob($dir . '/*.csv'); + $physical_files = array(); + $cassandra_files = array(); + foreach ($files as $file) { + $filename = basename($file, '.csv'); + $csv_type = substr($filename, 0, strpos($filename, '_')); + $file_num = substr($filename, strpos($filename, '_') + 1); + if ($csv_type == 'physical') { + $physical_files[$file_num] = $file; + } elseif ($csv_type == 'cassandra') { + $cassandra_files[$file_num] = $file; + } + } + + ksort($physical_files); + ksort($cassandra_files); + + $missing_physical_files = array(); + $missing_cassandra_entries = array(); + $cassandra_entries = []; + $physical_entries = []; + + foreach ($physical_files as $file_num => $physical_file) { + $cassandra_file = $dir . '/cassandra_' . $file_num . '.csv'; + + if (file_exists($physical_file) && file_exists($cassandra_file)) { + $compared_physical[] = $this->compare_csv_files($physical_file, $cassandra_file); + $compared_cassandra[] = $this->compare_csv_files($cassandra_file, $physical_file); + $physical_entries = $this->filterAndMapEntries($compared_cassandra); + + //echo "phe: " . var_dump($physical_entries) . PHP_EOL; + $cassandra_entries = $this->filterAndMapEntries($compared_physical); + + } else { + if (!file_exists($physical_file)) { + $missing_physical_files[] = $physical_file; + + echo "cfl: " . var_dump($missing_physical_files) . PHP_EOL; + + } + if (!file_exists($cassandra_file)) { + $missing_cassandra_files[] = $cassandra_file; + } + } + + if (!file_exists($cassandra_file)) { + $physical_file_lines = $this->getFileLines($physical_file); + $file_assoc = array(); + + foreach ($physical_file_lines as $line) { + if ($line === reset($physical_file_lines)) { + continue; + } + $values = explode(",", $line); + if (count($values) == 3) { + $values[2] = str_replace('"', '', $values[2]); + $file_assoc[$values[0]] = [$values[1], $values[2]]; + } + } + foreach ($file_assoc as $id => $data) { + if (!isset($cassandra_files_assoc[$id])) { + $missing_cassandra_entries[] = [ + 'id' => $id, + 'file1' => [$id, $data[0], $data[1]], + ]; + } + } + } else { + + } + } + + foreach ($cassandra_files as $file_num => $cassandra_file) { + $physical_file = $dir . '/physical_' . $file_num . '.csv'; + + if (!file_exists($physical_file)) { + $cassandra_file_lines = $this->getFileLines($cassandra_file); + // echo "cfl: " . var_dump($cassandra_file_lines) . PHP_EOL; + $file_assoc = $this->buildFileAssociation($cassandra_file_lines); + + foreach ($file_assoc as $id => $data) { + + if (!isset($physical_files_assoc[$id])) { + $missing_physical_files[] = [ + 'id' => $id, + 'file1' => [$id, $data[0], $data[1], $data[2], $data[3], $data[4], $data[5], $data[6]], + ]; + } + } + } + } + $result_from_cassandra_entries = array_unique(array_merge($cassandra_entries, $missing_cassandra_entries), SORT_REGULAR); + + // var_dump($missing_physical_files); + $result_from_physical_files = array_unique(array_merge($physical_entries, $missing_physical_files), SORT_REGULAR); + + $this->generateCsvReportForDbEntries($result_from_cassandra_entries, 'result_from_cassandra_entries.csv'); + $this->generateHtmlReport($result_from_cassandra_entries, 'cassandra.html'); + $this->generateCsvReportForPhysicalFiles($result_from_physical_files, 'result_from_physical_files.csv'); + $this->generateHtmlReport($result_from_physical_files, 'physical.html'); + // $this->compareCSVFilesTransform($this->structured_directory . 'cassandra_all_entries.csv', $this->structured_directory . 'result_from_cassandra_entries.csv', $this->structured_directory . 'final_file.csv'); + // $this->deletePhysicalFilesFromCsv('result_from_cassandra_entries.csv'); + } + private function buildFileAssociation($file_lines) + { + $file_assoc = []; + + foreach ($file_lines as $line) { + if ($line === reset($file_lines)) { + continue; + } + + $values = explode(",", $line); + $values[2] = str_replace('"', '', $values[2]); + if (count($values) == 3) { + + $file_assoc[$values[0]] = [$values[1], $values[2]]; + } + if (count($values) > 3) { + + $file_assoc[$values[0]] = [$values[1], $values[2], $values[3], $values[4], $values[5], $values[0]]; + } + } + + return $file_assoc; + } + + + private function compare_csv_files($file1_path, $file2_path) + { + $file1_data = array_map('str_getcsv', file($file1_path)); + $file2_data = array_map('str_getcsv', file($file2_path)); + + $file1_headers = array_shift($file1_data); + $file2_headers = array_shift($file2_data); + + // find indexes of columns in each file + $id_index_1 = array_search('id', $file1_headers); + $id_index_2 = array_search('id', $file2_headers); + $size_index_1 = array_search('size', $file1_headers); + $size_index_2 = array_search('size', $file2_headers); + $time_index_1 = array_search('creation_time', $file1_headers); + $time_index_2 = array_search('creation_time', $file2_headers); + $filename_index_1 = array_search('filename', $file1_headers); + $filename_index_2 = array_search('filename', $file2_headers); + + $clientid_index_1 = array_search('client_id', $file1_headers); + $bucket_index_1 = array_search('bucket', $file1_headers); + $attachment_id_index_1 = array_search('attachment_id', $file1_headers); + + $file1_assoc = array_reduce($file1_data, function ($result, $row) use ($id_index_1, $size_index_1, $time_index_1, $filename_index_1, $clientid_index_1, $bucket_index_1, $attachment_id_index_1) { + $result[$row[$id_index_1]] = [ + 'id' => $row[$id_index_1], + 'file1' => [ + $row[$id_index_1], + $row[$size_index_1], + $row[$time_index_1], + isset($row[$filename_index_1]) ? $row[$filename_index_1] : null, + isset($row[$clientid_index_1]) ? $row[$clientid_index_1] : null, + isset($row[$bucket_index_1]) ? $row[$bucket_index_1] : null, + isset($row[$attachment_id_index_1]) ? $row[$attachment_id_index_1] : null + ], + ]; + return $result; + }, []); + $file2_assoc = array_reduce($file2_data, function ($result, $row) use ($id_index_2, $size_index_2, $time_index_2, $filename_index_2) { + $result[$row[$id_index_2]] = [ + 'id' => $row[$id_index_2], + 'file2' => [ + $row[$id_index_2], + $row[$size_index_2], + $row[$time_index_2], + isset($row[$filename_index_2]) ? $row[$filename_index_2] : null + ], + ]; + return $result; + }, []); + + $missing_entries = []; + foreach ($file1_assoc as $id => $data) { + if (!isset($file2_assoc[$id])) { + $missing_entries[] = [ + 'id' => $id, + 'file1' => $data['file1'], + ]; + } else { + $file2_data = $file2_assoc[$id]['file2']; + if ($data['file1'][1] !== $file2_data[1] || $data['file1'][2] !== $file2_data[2]) { + $missing_entries[] = [ + 'id' => $id, + 'file1' => $data['file1'], + 'file2' => $file2_data, + ]; + } + } + } + + return $missing_entries; + } + + + + + /** + * Generates a CSV report of inconsistent files. + * + * @param array $inconsistentFiles An array of inconsistent files. + * @param string $filename The filename to use for the report. + */ + private function generateCsvReportForPhysicalFiles(array $inconsistentFiles, string $name): void + { + $fp = fopen($name, 'w'); + fputcsv($fp, ['File/Attachment', 'File path', 'File name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time', 'ClientId', 'Bucket' , 'Id']); + + foreach ($inconsistentFiles as $row) { + + $file1Value = $row['file1'][0]; + $check_value = $row['id']; + //$check_value = is_numeric($row['id']) ? $row['file1'][3] : $file1Value; + $filePath = $this->directory . '/' . $check_value ? $this->directory . '/' . $check_value : $this->directory . '/' . $row['file2'][0]; + if (is_numeric($row['id'])) { + $filePath = $this->directory . '/' . $row['file1'][3]; + $check_value = $row['file1'][3]; + } + + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); + $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); + $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; + $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; + $clientId = isset($row['file1'][4]) ? $row['file1'][4] : ''; + $bucket = isset($row['file1'][5]) ? $row['file1'][5] : ''; + $attachmentId = isset($row['file1'][5]) ? $row['file1'][6] : ''; + + fputcsv($fp, [ + 'Attachment', + $filePath, + $check_value, + $thumb1, + $thumb2, + $size, + $creationTime, + $clientId, + $bucket, + $attachmentId, + ]); + } + + fclose($fp); + chmod($name, 0666); + } + + + private function generateCsvReportForDbEntries(array $inconsistentFiles, string $name): void + { + + $fp = fopen($name, 'w'); + fputcsv($fp, ['File/Attachment', 'Entry Path', 'Entry Name', 'Thumb 1', 'Thumb 2', 'Size', 'Creation Time']); + + foreach ($inconsistentFiles as $row) { + + $filePath = $row['file1'][0] ? $row['file1'][0] : $row['file2'][0]; + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); + $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); + $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; + $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; + fputcsv($fp, [ + 'Attachment', + $filePath, + $row['file1'][0], + $thumb1, + $thumb2, + $size, + $creationTime + ]); + } + + fclose($fp); + chmod($name, 0666); + } + + + /** + * Generates an HTML report of inconsistent files. + * + * @param array $inconsistentFiles An array of inconsistent files. + * @param string $filename The filename to use for the report. + */ + private function generateHtmlReport(array $inconsistentFiles, string $name): void + { + $file = fopen($name, 'w'); + if (!$file) { + throw new Exception('Failed to open the file for writing.'); + } + $html = ''; + + foreach ($inconsistentFiles as $row) { + + $filePath = $this->directory . '/' . $row['file1'][0]; + $size = isset($row['file1'][1]) ? (string) $row['file1'][1] : filesize($filePath); + $creationTime = isset($row['file1'][2]) ? (string) $row['file1'][2] : date('Y-m-d H:i:s', filectime($filePath)); + $thumb1 = isset($row['thumb1']) ? $row['thumb1'] : ''; + $thumb2 = isset($row['thumb2']) ? $row['thumb2'] : ''; + + // Write the properties to the HTML table + $html .= ''; + } + + $html .= '
File/AttachmentFile pathFile nameThumb 1Thumb 2SizeCreation Time
Attachment' . htmlspecialchars($filePath) . '' . htmlspecialchars($row['file1'][0]) . '' . htmlspecialchars($thumb1) . '' . htmlspecialchars($thumb2) . '' . htmlspecialchars($size) . '' . htmlspecialchars($creationTime) . '
'; + + fwrite($file, $html); + fclose($file); + } + + function compareCSVFilesTransform($firstFile, $secondFile, $finalFile) + { + // Read the first CSV file + $firstData = array_map('str_getcsv', file($firstFile)); + $firstHeaders = array_shift($firstData); + + // Read the second CSV file + $secondData = array_map('str_getcsv', file($secondFile)); + $secondHeaders = array_shift($secondData); + + // Find the indexes of the columns to compare in both files + $firstIdIndex = array_search('id', $firstHeaders); + $firstSizeIndex = array_search('size', $firstHeaders); + $firstCreationTimeIndex = array_search('creation_time', $firstHeaders); + + $secondIdIndex = array_search('Entry Path', $secondHeaders); + $secondSizeIndex = array_search('size', $secondHeaders); + $secondCreationTimeIndex = array_search('creation_time', $secondHeaders); + + // Get the entries from the first file + $firstEntries = []; + foreach ($firstData as $row) { + $firstEntries[$row[$firstIdIndex]] = [ + 'id' => $row[$firstIdIndex], + 'size' => $row[$firstSizeIndex], + 'creation_time' => $row[$firstCreationTimeIndex] + ]; + } + + // Create the final result CSV file + $finalCsvFile = fopen($finalFile, 'w'); + fputcsv($finalCsvFile, ['id', 'size', 'creation_time']); + + // Compare the entries from the second file with the entries in the first file + foreach ($secondData as $row) { + $secondId = $row[$secondIdIndex]; + $secondSize = $row[$secondSizeIndex]; + $secondCreationTime = $row[$secondCreationTimeIndex]; + + // Check if there is a matching entry in the first file + if ( + isset($firstEntries[$secondId]) && + $firstEntries[$secondId]['size'] === $secondSize && + $firstEntries[$secondId]['creation_time'] === $secondCreationTime + ) { + fputcsv($finalCsvFile, [ + $secondId, + $secondSize, + $secondCreationTime + ]); + } + } + + fclose($finalCsvFile); + } + + + private function deletePhysicalFilesFromCsv(string $csvFile): void + { + $directory = $this->directory . '/'; + $entries = []; + if (($handle = fopen($csvFile, 'r')) !== false) { + // Collect the file paths to be deleted + while (($data = fgetcsv($handle)) !== false) { + // Skip the header row + if ($data[0] === 'File/Attachment') { + continue; + } + + $filePath = $directory . $data[1]; + $entries[] = $filePath; + } + fclose($handle); + } + + // Prompt the user for confirmation before deleting the files + echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; + echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; + echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; + $confirmation = trim(fgets(STDIN)); + + if (strtolower($confirmation) === 'yes') { + // Create a log file to record the deleted files + $logFile = 'deleted_files.log'; + $logHandle = fopen($logFile, 'a'); + + // Delete the physical files + foreach ($entries as $filePath) { + if (file_exists($filePath)) { + // unlink($filePath); + echo "File deleted: $filePath" . PHP_EOL; + + // Write the deleted file path to the log file + fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); + } else { + echo "File not found: $filePath" . PHP_EOL; + } + } + + fclose($logHandle); + + echo "Deletion completed. The list of deleted files is saved in '$logFile'." . PHP_EOL; + } else { + echo "Deletion of physical files aborted." . PHP_EOL; + } + } + + + private static function _is_uuid(string $id): bool + { + $regex = '/^[a-z0-9]{8}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{4}-[a-z0-9]{12}$/'; + return preg_match($regex, $id); + } + + /** + * Check if the attachment string is a valid migrated attachment + * + * @param string $attachmentId + * + * @return bool + */ + private static function _isValidMigratedAttachment(string $attachmentId = ''): bool + { + return str::sub($attachmentId, 0, 2) === OLD_ATTACHMENTS_PREFIX; + } + + + + /* ATTACHMENT DELETION ===================================================================================================================== */ + + /** + * Returns cassandra schema version + * + * + * @return int + */ + + private function schema_version() + { + return static::$schemaVersion; + } + /** + * Returns info(id,size) for attachment file + * + * @param int $clientId + * @param string|null $bucketId + * @param string $id + * + * @return object + */ + private function get_info(int $clientId, ?string $bucketId, string $id): ?object + { + $attachment = null; + $args = [ + 'client_id' => $clientId, + 'id' => $id + ]; + + if ($this->schema_version() == 1) { + $query = $this->cassandra->prepare('SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? ALLOW FILTERING'); + } else { + $q = 'SELECT * FROM attachment_file_info WHERE id = ? AND client_id = ? AND bucket = \'' . $bucketId . '\''; + echo "Q: [" . $q . "]\n"; + $query = $this->cassandra->prepare($q); + } + $res = $this->cassandra->execute( + $query, + [ + 'arguments' => $args + ] + ); + if ($res && $res->valid()) { + $tmp = $res->current(); + + $attachment = (object) $tmp; + $attachment->id = (string) $tmp['id']; + if (array_key_exists('size', $tmp)) { + $attachment->size = (int) $tmp['size']; + } + } + return $attachment; + } + + /** + * prepares 'bucket' field for partitioning + * + * @param string $id + * @return string + */ + private function _set_bucket(string $id): string + { + return substr($id, 0, static::$bucketMagic); + } + + /** + * updates attachment count and size + * + * @param boolean $add + * @param integer $size + * + * @return void + */ + private function _update_attachment_stats(bool $add, int $size): void + { + $op = $add ? '+' : '-'; + $query = $this->cassandra->prepare('UPDATE attachment_stats SET count = count ' . $op . ' 1 where client_id = ?'); + $this->cassandra->execute($query, ['arguments' => ['client_id' => (int) static::$clientId]]); + $query = $this->cassandra->prepare( + 'UPDATE attachment_stats SET size = size ' . $op . ' ' . $size . ' where client_id = ?' + ); + $this->cassandra->execute($query, ['arguments' => ['client_id' => static::$clientId]]); + } + + /** + * returns full attachment table key for given id + * + * @param string $id + * + * @return object|null + */ + private function _get_attachment_key(string $id): ?object + { + //echo "GET KEY!\n"; + $result = null; + $query = $this->cassandra->prepare('SELECT * from attachment_ids where id = ? AND bucket = ? AND client_id = ?'); + $arguments = [ + 'client_id' => static::$clientId, + 'bucket' => $this->_set_bucket($id), + 'id' => $id, + ]; + //echo "args: " . var_dump($arguments, true) . PHP_EOL; + $data = $this->cassandra->execute($query, ['arguments' => $arguments]); + echo "GK2!!!!"; + //var_dump($data); + if ($data && $data->valid()) { + $result = (object) $data->current(); + } + //var_dump($result); + return $result; + } + + /** + * returns single attachment data for provided ID + * + * @param string $attachmentId + * + * @return array + */ + private function _get_attachment_by_id(string $attachmentId): ?object + { + $attachment = null; + try { + $properties = '*'; + echo "attid: ". $attachmentId . PHP_EOL; + $key = $this->_get_attachment_key($attachmentId); + if ($key) { + if ($this->schema_version() === 1) { + $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? ALLOW FILTERING'); + $arguments = [ + 'client_id' => static::$clientId, + 'id' => $key->id, + 'project_id' => $key->project_id, + 'entity_type' => $key->entity_type, + ]; + } else { + $query = $this->cassandra->prepare('SELECT ' . $properties . ' FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?'); + $arguments = [ + 'client_id' => static::$clientId, + 'id' => $key->id, + 'project_id' => $key->project_id, + 'entity_id' => $key->entity_id, + 'entity_type' => $key->entity_type, + ]; + } + $attachment = $this->cassandra->execute($query, ['arguments' => $arguments]); + } + } catch (Cassandra\Exception\InvalidArgumentException $e) { + } + //echo "ATTA: \n"; + //var_dump($attachment); + + return $attachment != null && $attachment->valid() ? $this->_convert_to_object($attachment->current()) : null; + } + + /** + * deletes attachment_ids entry + * + * @param string $id + */ + private function _delete_attachment_key(string $id): void + { + $result = null; + $query = $this->cassandra->prepare('DELETE FROM attachment_ids WHERE id = ? AND bucket = ? AND client_id = ?'); + $arguments = [ + 'client_id' => static::$clientId, + 'bucket' => $this->_set_bucket($id), + 'id' => $id, + ]; + $this->cassandra->executeAsync($query, ['arguments' => $arguments]); + } + + /** + * updates attachment data references + * + * @param string $data_id + * @param bool $add + * @param string $attachmentId + * + * @return void + */ + private function _update_file_refs(string $data_id, bool $add, string $attachmentId = ''): void + { + $queryArguments = [ + 'client_id' => static::$clientId, + 'bucket' => $this->_set_bucket($data_id), + 'id' => $data_id, + ]; + $query = $this->cassandra->prepare('UPDATE attachment_file_refs SET ref_count = ref_count ' . ($add ? '+' : '-') . ' 1 WHERE bucket = ? AND id = ? AND client_id = ?'); + $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); + + if (!empty($attachmentId)) { + $queryArguments['attachment_id'] = $attachmentId; + if ($add) { + $query = $this->cassandra->prepare('INSERT INTO attachment_file_ids (client_id,bucket,id,attachment_id) VALUES(?,?,?,?)'); + } else { + $query = $this->cassandra->prepare('DELETE FROM attachment_file_ids WHERE client_id = ? AND bucket = ? AND attachment_id = ? AND id = ?'); + } + $this->cassandra->executeAsync($query, ['arguments' => $queryArguments]); + + } + } + + + /** + * Deletes attachment from Cassandra + * + * + * @param int $clientId + * @param string|null $bucketId + * @param string $id + * + * @return bool + */ + public function deleteAttachment(int $clientId, ?string $bucketId, string $id): bool + { + static::$clientId = $clientId; + $refData = [ + 'client_id' => $clientId, + 'id' => $id, + ]; + $fileInfo = $this->get_info($clientId, $bucketId, $id); + //var_dump($fileInfo); + if ($fileInfo) { + $this->_update_attachment_stats(false, $fileInfo->size); + } else { + return false; + } + + $q = 'DELETE FROM attachment_file_info WHERE id = ? AND client_id = ? '; + if ($this->schema_version() === 2) { + $q = $q . ' AND bucket = \'' . $bucketId . '\''; + } + $query = $this->cassandra->prepare($q); + $this->cassandra->execute($query, ['arguments' => $refData]); + + + echo "DEL REFS\n"; + $refData['bucket'] = $this->_set_bucket($id); + $query = $this->cassandra->prepare( + 'DELETE FROM attachment_file_refs WHERE bucket = ? AND id = ? AND client_id = ?' + ); + $result = $this->cassandra->execute($query, ['arguments' => $refData]); + + // get all attachments with deleted file and remove them + $query = $this->cassandra->prepare( + 'SELECT attachment_id FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' + ); + $result = $this->cassandra->execute($query, ['arguments' => $refData]); + + echo "DEL ATTACHMENTS\n"; + if ($this->schema_version() === 1) { + $delQuery = $this->cassandra->prepare( + 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' + ); + } else { + $delQuery = $this->cassandra->prepare( + 'DELETE FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ? AND entity_id = ?' + ); + } + while ($result && $result->valid()) { + $attachmentId = $result->current()['attachment_id']; + $key = $this->_get_attachment_key((string) $attachmentId); + if ($key == null) { + $result->next(); + continue; + } + if ($this->schema_version() === 1) { + $selectQuery = $this->cassandra->prepare( + 'SELECT entity_id FROM attachments WHERE id = ? AND client_id = ? AND project_id = ? AND entity_type = ?' + ); + $attachment = $this->cassandra->execute($selectQuery, [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'project_id' => $key->project_id, + 'entity_type' => $key->entity_type, + 'id' => $attachmentId, + ] + ]); + $entity_id = $attachment->current()['entity_id']; + $delArgs = [ + 'client_id' => static::$clientId, + 'project_id' => $key->project_id, + 'entity_type' => $key->entity_type, + 'id' => $attachmentId, + ]; + } else { + $entity_id = $key->entity_id; + $delArgs = [ + 'client_id' => static::$clientId, + 'project_id' => $key->project_id, + 'entity_type' => $key->entity_type, + 'entity_id' => $entity_id, + 'id' => $attachmentId, + ]; + } + $this->_delete_attachment_key((string) $attachmentId); + $this->cassandra->execute($delQuery, ['arguments' => $delArgs]); + $result->next(); + } + $query = $this->cassandra->prepare( + 'DELETE FROM attachment_file_ids WHERE bucket = ? AND id = ? AND client_id = ?' + ); + $this->cassandra->execute($query, ['arguments' => $refData]); + $result = false; + $attachment = $this->_get_attachment_by_id($id); + if ($attachment) { + if ($this->schema_version() === 1) { + $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_type = ? AND client_id = ?"); + $arguments = [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'id' => $id, + 'project_id' => $attachment->project_id, + 'entity_type' => $attachment->entity_type + ], + ]; + } else { + $query = $this->cassandra->prepare("DELETE FROM attachments WHERE id = ? AND project_id = ? AND entity_id = ? AND entity_type = ? AND client_id = ?"); + $arguments = [ + 'arguments' => [ + 'client_id' => static::$clientId, + 'id' => $id, + 'project_id' => $attachment->project_id, + 'entity_type' => $attachment->entity_type, + 'entity_id' => $attachment->entity_id + ], + ]; + } + $queryResult = $this->cassandra->execute($query, $arguments) != null; + + if ($queryResult) { + $result = true; + $this->_update_file_refs($attachment->data_id, false, $attachment->id); + $this->_delete_attachment_key($id); + } + } + + return $result; + } + + + /** + * Deletes attachments provided in file + * + * + * @param string $file + * @param string $src ('cassandra'/'file') + * + * @return void + * + * example file: + * + * cassandra,/test1/testx,testx,testx-thumb1,testx-thumb2,4343,20-02-22 13:30,1,a,abc-def + * cassandra,/test1/testx1,testx1,testx1-thumb1,testx1-thumb2,4343,20-02-22 13:30,1,1,1abc-def + * file,/tmp/testx3f,testx3f,testx3f-thumb1,testx3f-thumb2,4343,20-02-22 13:30,,, + * cassandra,/test1/testx3,testx3,testx3-thumb1,testx3-thumb2,4343,20-02-22 13:30,1,3,3abc-def + */ + public function processAttachmentDeletionCSV(string $file, string $src) : void + { + + echo "Before proceeding with the deletion, make sure you have a backup of your data." . PHP_EOL; + echo "You can revert back to the backup in case of accidental data loss." . PHP_EOL; + echo "Do you want to delete the physical files listed in the CSV report? (yes/no): "; + $confirmation = trim(fgets(STDIN)); + + if (strtolower($confirmation) !== 'yes') { + return; + } + $file_contents = file_get_contents($file); + $file_lines = explode("\n", $file_contents); + $logFile = 'deleted_files.log'; + $logHandle = fopen($logFile, 'a'); + $directory = $this->directory . '/'; + + foreach ($file_lines as $line) { + if ($line === reset($file_lines)) { + continue; + } + $values = explode(",", $line); + + if ($values[0] === 'File/Attachment') { + continue; + } + if (count($values) >= 7) { + $data = (object) array(); + $data->source = $values[0]; + $data->path = $values[1]; + $data->name = $values[2]; + $data->thumb1 = $values[3]; + $data->thumb2 = $values[4]; + $data->size = $values[5]; + $data->created = $values[6]; + if(count($values) >= 10) { + $data->clientId = (int) $values[7]; + $data->bucket = $values[8]; + $data->id = $values[9]; + } + if ($data->source == $src) { + if ($src === 'cassandra') { + echo "will delete " . $data->clientId . " : " . $data->bucket . " : " . $data->id . PHP_EOL; + fwrite($logHandle, "Deleted attachment: $data->id" . PHP_EOL); + } else { + $filePath = $directory . $values[1]; + $thumb1Path = $directory . $values[1]; + $thumb2Path = $directory . $values[1]; + if (file_exists($filePath)) { + // unlink($filePath); + // unlink($thumb1Path); + // unlink($thumb2Path); + echo "File deleted: $filePath" . PHP_EOL; + // Write the deleted file path to the log file + fwrite($logHandle, "Deleted file: $filePath" . PHP_EOL); + } else { + echo "File not found: $filePath" . PHP_EOL; + } + } + } + } + } + + fclose($logHandle); + + echo "Deletion completed. The list of attachments is saved in '$logFile'." . PHP_EOL; + } +} + +#$options = getopt('', ['directory:']); +$checker = new DataConsistencyChecker(); +$checker->checkConsistency("attachment_file_info", true); +#$checker::$cassandraHost = 'localhost'; +#$checker->init($options); +#$checker->deleteAttachment("1", "f", "ff29ead0-8696-4ef1-8120-538d6dd7efd1"); +#$checker->processAttachmentDeletionCSV("todelete.csv", "file");